@datagrok/bio 1.7.24 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +7 -4
- package/dist/package-test.js +916 -308
- package/dist/package.js +689 -193
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +26 -10
- package/files/samples/sample_HELM_empty_vals.csv +541 -0
- package/package.json +10 -6
- package/{setup-unlink-clean → setup-unlink-clean.sh} +0 -0
- package/{setup → setup.sh} +4 -1
- package/src/__jest__/remote.test.ts +2 -2
- package/src/__jest__/test-node.ts +3 -2
- package/src/package-test.ts +0 -1
- package/src/package.ts +34 -46
- package/src/tests/WebLogo-positions-test.ts +43 -21
- package/src/tests/activity-cliffs-tests.ts +15 -34
- package/src/tests/activity-cliffs-utils.ts +19 -0
- package/src/tests/detectors-test.ts +21 -18
- package/src/tests/renderers-test.ts +12 -7
- package/src/tests/sequence-space-test.ts +17 -12
- package/src/tests/sequence-space-utils.ts +10 -0
- package/src/utils/cell-renderer.ts +123 -127
- package/src/utils/convert.ts +12 -2
- package/src/utils/multiple-sequence-alignment.ts +8 -2
- package/src/viewers/vd-regions-viewer.ts +7 -0
- package/{test-Bio-4f0c8bae6479-367602e1.html → test-Bio-dc07f068a0b2-3cd5a505.html} +6 -9
|
@@ -68,6 +68,7 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
68
68
|
});
|
|
69
69
|
|
|
70
70
|
const page = await browser.newPage();
|
|
71
|
+
await page.setDefaultNavigationTimeout(0);
|
|
71
72
|
await page.goto(`${url}/oauth/`);
|
|
72
73
|
await page.setCookie({name: 'auth', value: token});
|
|
73
74
|
await page.evaluate((token: any) => {
|
|
@@ -75,8 +76,8 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
75
76
|
}, token);
|
|
76
77
|
await page.goto(url);
|
|
77
78
|
try {
|
|
78
|
-
await page.waitForSelector('.grok-preloader');
|
|
79
|
-
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout:
|
|
79
|
+
await page.waitForSelector('.grok-preloader', { timeout: 1800000 });
|
|
80
|
+
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 3600000});
|
|
80
81
|
} catch (error) {
|
|
81
82
|
throw error;
|
|
82
83
|
}
|
package/src/package-test.ts
CHANGED
|
@@ -22,7 +22,6 @@ export {tests};
|
|
|
22
22
|
//input: string category {optional: true}
|
|
23
23
|
//input: string test {optional: true}
|
|
24
24
|
//output: dataframe result
|
|
25
|
-
//top-menu: Tools | Dev | JS API Tests
|
|
26
25
|
export async function test(category: string, test: string): Promise<DG.DataFrame> {
|
|
27
26
|
const data = await runTests({category, test});
|
|
28
27
|
return DG.DataFrame.fromObjects(data)!;
|
package/src/package.ts
CHANGED
|
@@ -20,7 +20,6 @@ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CO
|
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
|
-
import {lru} from './utils/cell-renderer';
|
|
24
23
|
import {representationsWidget} from './widgets/representations';
|
|
25
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
@@ -28,44 +27,39 @@ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-uti
|
|
|
28
27
|
|
|
29
28
|
|
|
30
29
|
//tags: init
|
|
31
|
-
export async function initBio()
|
|
32
|
-
// apparently HELMWebEditor requires dojo to be initialized first
|
|
33
|
-
const funcList: DG.Func[] = DG.Func.find({package: 'Helm', name: 'initHelm'});
|
|
34
|
-
console.debug(`Bio: initBio() funcList.length = ${funcList.length}`);
|
|
35
|
-
if (funcList.length === 1)
|
|
36
|
-
await grok.functions.call('Helm:initHelp');
|
|
37
|
-
|
|
38
|
-
return new Promise((resolve, reject) => {
|
|
39
|
-
// @ts-ignore
|
|
40
|
-
dojo.ready(function() { resolve(null); });
|
|
41
|
-
});
|
|
30
|
+
export async function initBio() {
|
|
42
31
|
}
|
|
43
32
|
|
|
44
|
-
//name:
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
|
|
33
|
+
//name: fastaSequenceCellRenderer
|
|
34
|
+
//tags: cellRenderer
|
|
35
|
+
//meta.cellType: Sequence
|
|
36
|
+
//meta.columnTags: units=fasta
|
|
37
|
+
//output: grid_cell_renderer result
|
|
38
|
+
export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
39
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
48
40
|
}
|
|
49
41
|
|
|
50
|
-
//name:
|
|
42
|
+
//name: separatorSequenceCellRenderer
|
|
51
43
|
//tags: cellRenderer
|
|
52
|
-
//meta.cellType:
|
|
44
|
+
//meta.cellType: Sequence
|
|
45
|
+
//meta.columnTags: units=separator
|
|
53
46
|
//output: grid_cell_renderer result
|
|
54
|
-
export function
|
|
47
|
+
export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
55
48
|
return new MacromoleculeSequenceCellRenderer();
|
|
56
49
|
}
|
|
57
50
|
|
|
58
51
|
function checkInputColumn(col: DG.Column, name: string,
|
|
59
52
|
allowedNotations: string[] = [], allowedAlphabets: string[] = []): boolean {
|
|
60
|
-
const
|
|
53
|
+
const notation: string = col.getTag(DG.TAGS.UNITS);
|
|
54
|
+
const alphabet: string = col.getTag('alphabet')
|
|
61
55
|
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
62
56
|
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
63
57
|
return false;
|
|
64
58
|
} else if (
|
|
65
59
|
(allowedAlphabets.length > 0 &&
|
|
66
|
-
!allowedAlphabets.some((a) =>
|
|
60
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))) ||
|
|
67
61
|
(allowedNotations.length > 0 &&
|
|
68
|
-
!allowedNotations.some((n) =>
|
|
62
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase())))
|
|
69
63
|
) {
|
|
70
64
|
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
71
65
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
@@ -118,7 +112,7 @@ export function vdRegionViewer() {
|
|
|
118
112
|
//input: double similarity = 80 [Similarity cutoff]
|
|
119
113
|
//input: string methodName { choices:["UMAP", "t-SNE", "SPE"] }
|
|
120
114
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
121
|
-
similarity: number, methodName: string): Promise<
|
|
115
|
+
similarity: number, methodName: string): Promise<DG.Viewer | undefined> {
|
|
122
116
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
123
117
|
return;
|
|
124
118
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
@@ -128,8 +122,13 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
128
122
|
const options = {
|
|
129
123
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
130
124
|
};
|
|
131
|
-
const
|
|
132
|
-
|
|
125
|
+
const tags = {
|
|
126
|
+
'units': macroMolecule.tags['units'],
|
|
127
|
+
'aligned': macroMolecule.tags['aligned'],
|
|
128
|
+
'separator': macroMolecule.tags['separator'],
|
|
129
|
+
'alphabet': macroMolecule.tags['alphabet'],
|
|
130
|
+
}
|
|
131
|
+
const sp = await getActivityCliffs(
|
|
133
132
|
df,
|
|
134
133
|
macroMolecule,
|
|
135
134
|
encodedCol,
|
|
@@ -140,11 +139,12 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
140
139
|
'Levenshtein',
|
|
141
140
|
methodName,
|
|
142
141
|
DG.SEMTYPE.MACROMOLECULE,
|
|
143
|
-
|
|
142
|
+
tags,
|
|
144
143
|
sequenceSpace,
|
|
145
144
|
sequenceGetSimilarities,
|
|
146
145
|
drawTooltip,
|
|
147
146
|
(options as any)[methodName]);
|
|
147
|
+
return sp;
|
|
148
148
|
}
|
|
149
149
|
|
|
150
150
|
//top-menu: Bio | Sequence Space...
|
|
@@ -155,7 +155,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
155
155
|
//input: string similarityMetric { choices:["Levenshtein", "Tanimoto"] }
|
|
156
156
|
//input: bool plotEmbeddings = true
|
|
157
157
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
|
|
158
|
-
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<
|
|
158
|
+
similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<DG.Viewer|undefined> {
|
|
159
159
|
if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
|
|
160
160
|
return;
|
|
161
161
|
const encodedCol = encodeMonomers(macroMolecule);
|
|
@@ -176,14 +176,16 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
176
176
|
for (const col of embeddings) {
|
|
177
177
|
const listValues = col.toList();
|
|
178
178
|
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
179
|
-
table.columns.add(DG.Column.
|
|
180
|
-
}
|
|
179
|
+
table.columns.add(DG.Column.fromList('double', col.name, listValues));
|
|
180
|
+
}
|
|
181
|
+
let sp;
|
|
181
182
|
if (plotEmbeddings) {
|
|
182
183
|
for (const v of grok.shell.views) {
|
|
183
184
|
if (v.name === table.name)
|
|
184
|
-
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
185
|
+
sp = (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
185
186
|
}
|
|
186
187
|
}
|
|
188
|
+
return sp;
|
|
187
189
|
};
|
|
188
190
|
|
|
189
191
|
//top-menu: Bio | To Atomic Level...
|
|
@@ -199,22 +201,6 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
199
201
|
if (!checkInputColumn(macroMolecule, 'To Atomic Level'))
|
|
200
202
|
return;
|
|
201
203
|
|
|
202
|
-
let currentView: DG.TableView;
|
|
203
|
-
for (const view of grok.shell.tableViews) {
|
|
204
|
-
if (df.name === view.name)
|
|
205
|
-
currentView = view;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Some hack to activate Chem Molecule rendering
|
|
209
|
-
const file2 = await _package.files.readAsText('tests/sar-small.csv');
|
|
210
|
-
const df2 = DG.DataFrame.fromCsv(file2);
|
|
211
|
-
const v2 = grok.shell.addTableView(df2);
|
|
212
|
-
setTimeout(() => {
|
|
213
|
-
grok.shell.closeTable(df2);
|
|
214
|
-
v2.close();
|
|
215
|
-
grok.shell.v = currentView;
|
|
216
|
-
}, 100);
|
|
217
|
-
|
|
218
204
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
219
205
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
220
206
|
const atomicCodes = getMolfilesFromSeq(macroMolecule, monomersLibObject);
|
|
@@ -224,6 +210,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
224
210
|
col.semType = DG.SEMTYPE.MOLECULE;
|
|
225
211
|
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
226
212
|
df.columns.add(col, true);
|
|
213
|
+
await grok.data.detectSemanticTypes(df);
|
|
227
214
|
}
|
|
228
215
|
|
|
229
216
|
|
|
@@ -415,3 +402,4 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
415
402
|
resDf.name = `datasets_detectMacromolecule_${path}`;
|
|
416
403
|
return resDf;
|
|
417
404
|
}
|
|
405
|
+
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
2
1
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
3
2
|
|
|
4
3
|
import * as grok from 'datagrok-api/grok';
|
|
5
4
|
import * as ui from 'datagrok-api/ui';
|
|
6
5
|
import * as DG from 'datagrok-api/dg';
|
|
7
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
|
|
8
9
|
category('WebLogo-positions', () => {
|
|
9
10
|
let tvList: DG.TableView[];
|
|
10
11
|
let dfList: DG.DataFrame[];
|
|
@@ -18,8 +19,6 @@ category('WebLogo-positions', () => {
|
|
|
18
19
|
-TC-GCTTGC--`;
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
const resShrinkEmptyTailDf1: PositionInfo[] = [];
|
|
22
|
-
|
|
23
22
|
before(async () => {
|
|
24
23
|
tvList = [];
|
|
25
24
|
dfList = [];
|
|
@@ -35,6 +34,9 @@ category('WebLogo-positions', () => {
|
|
|
35
34
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
36
35
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
37
36
|
|
|
37
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
38
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
39
|
+
|
|
38
40
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
39
41
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
40
42
|
|
|
@@ -57,9 +59,9 @@ category('WebLogo-positions', () => {
|
|
|
57
59
|
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
58
60
|
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
59
61
|
];
|
|
60
|
-
|
|
61
|
-
expect(positions.length,resAllDf1.length);
|
|
62
|
-
|
|
62
|
+
|
|
63
|
+
expect(positions.length, resAllDf1.length);
|
|
64
|
+
|
|
63
65
|
for (let i = 0; i < positions.length; i++) {
|
|
64
66
|
expect(positions[i].name, resAllDf1[i].name);
|
|
65
67
|
for (const key in positions[i].freq) {
|
|
@@ -68,11 +70,27 @@ category('WebLogo-positions', () => {
|
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
});
|
|
71
|
-
test('positions with shrinkEmptyTail option', async () => {
|
|
72
|
-
|
|
73
|
+
test('positions with shrinkEmptyTail option true (filterd)', async () => {
|
|
74
|
+
let csvDf2 = `seq
|
|
75
|
+
-TC-G-TTGC--
|
|
76
|
+
-TC-GCTTGC--
|
|
77
|
+
-T--C-GT-
|
|
78
|
+
-T--C-GT-
|
|
79
|
+
-T--C-GT-
|
|
80
|
+
-T--CCGT-`;
|
|
81
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
|
|
73
82
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
74
83
|
|
|
84
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
85
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
86
|
+
|
|
87
|
+
let seq: Column = df.getCol('seq');
|
|
88
|
+
df.filter.init((i) => {
|
|
89
|
+
return i > 2;
|
|
90
|
+
});
|
|
91
|
+
df.filter.fireChanged();
|
|
75
92
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
|
|
93
|
+
|
|
76
94
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
77
95
|
|
|
78
96
|
tvList.push(tv);
|
|
@@ -81,19 +99,19 @@ category('WebLogo-positions', () => {
|
|
|
81
99
|
const positions: PositionInfo[] = wlViewer['positions'];
|
|
82
100
|
|
|
83
101
|
const resAllDf1: PositionInfo[] = [
|
|
84
|
-
new PositionInfo('1', {'
|
|
85
|
-
new PositionInfo('2', {'T': new PositionMonomerInfo(
|
|
86
|
-
new PositionInfo('3', {'
|
|
87
|
-
new PositionInfo('4', {'-': new PositionMonomerInfo(
|
|
88
|
-
new PositionInfo('5', {'
|
|
89
|
-
new PositionInfo('6', {'-': new PositionMonomerInfo(
|
|
90
|
-
new PositionInfo('7', {'
|
|
91
|
-
new PositionInfo('8', {'T': new PositionMonomerInfo(
|
|
92
|
-
new PositionInfo('9', {'
|
|
93
|
-
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
102
|
+
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
103
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
104
|
+
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
105
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
106
|
+
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
107
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
108
|
+
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
109
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
110
|
+
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
94
111
|
];
|
|
95
112
|
|
|
96
|
-
|
|
113
|
+
expect(positions.length, resAllDf1.length);
|
|
114
|
+
|
|
97
115
|
for (let i = 0; i < positions.length; i++) {
|
|
98
116
|
expect(positions[i].name, resAllDf1[i].name);
|
|
99
117
|
for (const key in positions[i].freq) {
|
|
@@ -107,7 +125,10 @@ category('WebLogo-positions', () => {
|
|
|
107
125
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
108
126
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
109
127
|
|
|
110
|
-
|
|
128
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
129
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
130
|
+
|
|
131
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
111
132
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
112
133
|
|
|
113
134
|
tvList.push(tv);
|
|
@@ -126,7 +147,8 @@ category('WebLogo-positions', () => {
|
|
|
126
147
|
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
127
148
|
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
128
149
|
];
|
|
129
|
-
|
|
150
|
+
|
|
151
|
+
expect(positions.length, resAllDf1.length);
|
|
130
152
|
|
|
131
153
|
for (let i = 0; i < positions.length; i++) {
|
|
132
154
|
expect(positions[i].name, resAllDf1[i].name);
|
|
@@ -1,58 +1,39 @@
|
|
|
1
|
-
import {after, before, category,
|
|
1
|
+
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
|
-
import {
|
|
8
|
-
import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
|
|
9
|
-
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
10
|
-
import {encodeMonomers} from '../utils/utils';
|
|
7
|
+
import { _testActivityCliffsOpen } from './activity-cliffs-utils';
|
|
11
8
|
|
|
12
9
|
|
|
13
10
|
category('activityCliffs', async () => {
|
|
14
11
|
let actCliffsTableView: DG.TableView;
|
|
15
12
|
let actCliffsDf: DG.DataFrame;
|
|
13
|
+
let actCliffsTableViewWithEmptyRows: DG.TableView;
|
|
14
|
+
let actCliffsDfWithEmptyRows: DG.DataFrame;
|
|
15
|
+
|
|
16
16
|
|
|
17
17
|
before(async () => {
|
|
18
18
|
actCliffsDf = await readDataframe('samples/sample_MSA.csv');
|
|
19
19
|
actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
actCliffsDfWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
|
|
21
|
+
actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
after(async () => {
|
|
25
25
|
grok.shell.closeTable(actCliffsDf);
|
|
26
26
|
actCliffsTableView.close();
|
|
27
|
+
grok.shell.closeTable(actCliffsDfWithEmptyRows);
|
|
28
|
+
actCliffsTableViewWithEmptyRows.close();
|
|
27
29
|
});
|
|
28
30
|
|
|
29
31
|
test('activityCliffsOpen', async () => {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
|
|
36
|
-
const scatterPlot = await getActivityCliffs(
|
|
37
|
-
actCliffsDf,
|
|
38
|
-
actCliffsDf.col('MSA')!,
|
|
39
|
-
encodedCol,
|
|
40
|
-
axesNames,
|
|
41
|
-
'Activity cliffs',
|
|
42
|
-
actCliffsDf.col('Activity')!,
|
|
43
|
-
50,
|
|
44
|
-
'Levenshtein',
|
|
45
|
-
't-SNE',
|
|
46
|
-
DG.SEMTYPE.MACROMOLECULE,
|
|
47
|
-
units,
|
|
48
|
-
sequenceSpace,
|
|
49
|
-
sequenceGetSimilarities,
|
|
50
|
-
drawTooltip);
|
|
51
|
-
|
|
52
|
-
expect(scatterPlot != null, true);
|
|
53
|
-
|
|
54
|
-
const cliffsLink = (Array.from(scatterPlot.root.children) as Element[])
|
|
55
|
-
.filter((it) => it.className === 'ui-btn ui-btn-ok');
|
|
56
|
-
expect((cliffsLink[0] as HTMLElement).innerText, '2362 cliffs');
|
|
32
|
+
await _testActivityCliffsOpen(actCliffsDf, 53, 'UMAP', 'MSA');
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
test('activityCliffsOpenWithEmptyRows', async () => {
|
|
36
|
+
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 53, 'UMAP', 'HELM');
|
|
57
37
|
});
|
|
38
|
+
|
|
58
39
|
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
3
|
+
import {_package} from '../package-test';
|
|
4
|
+
import { activityCliffs } from '../package';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
8
|
+
const scatterPlot = await activityCliffs(
|
|
9
|
+
df,
|
|
10
|
+
df.col(colName)!,
|
|
11
|
+
df.col('Activity')!,
|
|
12
|
+
80,
|
|
13
|
+
method);
|
|
14
|
+
|
|
15
|
+
expect(scatterPlot != null, true);
|
|
16
|
+
|
|
17
|
+
const cliffsLink = Array.from(scatterPlot!.root.children).filter(it => it.className === 'ui-btn ui-btn-ok');
|
|
18
|
+
expect((cliffsLink[0] as HTMLElement).innerText, `${numberCliffs} cliffs`);
|
|
19
|
+
}
|
|
@@ -190,6 +190,7 @@ MWRSWY-CKHP
|
|
|
190
190
|
};
|
|
191
191
|
};
|
|
192
192
|
|
|
193
|
+
|
|
193
194
|
test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
|
|
194
195
|
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
195
196
|
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
@@ -197,44 +198,44 @@ MWRSWY-CKHP
|
|
|
197
198
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
198
199
|
|
|
199
200
|
test('Dna1', async () => {
|
|
200
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta
|
|
201
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
|
|
201
202
|
});
|
|
202
203
|
test('Rna1', async () => {
|
|
203
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta
|
|
204
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
|
|
204
205
|
});
|
|
205
206
|
test('AA1', async () => {
|
|
206
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta
|
|
207
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
|
|
207
208
|
});
|
|
208
209
|
test('MsaDna1', async () => {
|
|
209
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta
|
|
210
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
|
|
210
211
|
});
|
|
211
212
|
|
|
212
213
|
test('MsaAA1', async () => {
|
|
213
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta
|
|
214
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
|
|
214
215
|
});
|
|
215
216
|
|
|
216
217
|
test('SepDna', async () => {
|
|
217
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator
|
|
218
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
|
|
218
219
|
});
|
|
219
220
|
test('SepRna', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator
|
|
221
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
|
|
221
222
|
});
|
|
222
223
|
test('SepPt', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator
|
|
224
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
|
|
224
225
|
});
|
|
225
226
|
test('SepUn1', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator
|
|
227
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
|
|
227
228
|
});
|
|
228
229
|
test('SepUn2', async () => {
|
|
229
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator
|
|
230
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
|
|
230
231
|
});
|
|
231
232
|
|
|
232
233
|
test('SepMsaN1', async () => {
|
|
233
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator
|
|
234
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
|
|
234
235
|
});
|
|
235
236
|
|
|
236
237
|
test('SamplesFastaCsvPt', async () => {
|
|
237
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta
|
|
238
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
238
239
|
});
|
|
239
240
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
240
241
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -247,7 +248,7 @@ MWRSWY-CKHP
|
|
|
247
248
|
});
|
|
248
249
|
|
|
249
250
|
test('SamplesFastaFastaPt', async () => {
|
|
250
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta
|
|
251
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
251
252
|
});
|
|
252
253
|
|
|
253
254
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -265,7 +266,7 @@ MWRSWY-CKHP
|
|
|
265
266
|
});
|
|
266
267
|
|
|
267
268
|
test('samplesMsaComplexUn', async () => {
|
|
268
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator
|
|
269
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
|
|
269
270
|
});
|
|
270
271
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
271
272
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -280,7 +281,7 @@ MWRSWY-CKHP
|
|
|
280
281
|
});
|
|
281
282
|
|
|
282
283
|
test('samplesHelmCsvHELM', async () => {
|
|
283
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', '
|
|
284
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'helm', null, null, null);
|
|
284
285
|
});
|
|
285
286
|
|
|
286
287
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -296,7 +297,7 @@ MWRSWY-CKHP
|
|
|
296
297
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
297
298
|
});
|
|
298
299
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
299
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', '
|
|
300
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'helm', null, null, null);
|
|
300
301
|
});
|
|
301
302
|
test('samplesTestHelmNegativeValid', async () => {
|
|
302
303
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -328,7 +329,7 @@ MWRSWY-CKHP
|
|
|
328
329
|
});
|
|
329
330
|
|
|
330
331
|
test('samplesFastaPtPosSequence', async () => {
|
|
331
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta
|
|
332
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
332
333
|
});
|
|
333
334
|
|
|
334
335
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -383,7 +384,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
383
384
|
}
|
|
384
385
|
}
|
|
385
386
|
|
|
386
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
387
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
|
|
387
388
|
const df: DG.DataFrame = await readDf();
|
|
388
389
|
const col: DG.Column = df.col(colName)!;
|
|
389
390
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -392,6 +393,8 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
|
|
|
392
393
|
|
|
393
394
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
394
395
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
396
|
+
expect(col.getTag('aligned'), aligned);
|
|
397
|
+
expect(col.getTag('alphabet'), alphabet);
|
|
395
398
|
if (separator)
|
|
396
399
|
expect(col.getTag('separator'), separator);
|
|
397
400
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {after, before, category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
@@ -12,6 +12,7 @@ category('renderers', () => {
|
|
|
12
12
|
let dfList: DG.DataFrame[];
|
|
13
13
|
|
|
14
14
|
before(async () => {
|
|
15
|
+
await grok.functions.call('Bio:initBio');
|
|
15
16
|
tvList = [];
|
|
16
17
|
dfList = [];
|
|
17
18
|
});
|
|
@@ -43,15 +44,19 @@ category('renderers', () => {
|
|
|
43
44
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
44
45
|
`cell.renderer="${srcSeqCol!.getTag('cell.renderer')}"`);
|
|
45
46
|
expect(srcSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
46
|
-
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta
|
|
47
|
-
expect(srcSeqCol!.getTag('
|
|
47
|
+
expect(srcSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
48
|
+
expect(srcSeqCol!.getTag('aligned'), 'SEQ');
|
|
49
|
+
expect(srcSeqCol!.getTag('alphabet'), 'PT');
|
|
50
|
+
expect(srcSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
48
51
|
|
|
49
52
|
const msaSeqCol: DG.Column | null = await multipleSequenceAlignmentAny(df, srcSeqCol!);
|
|
50
53
|
tv.grid.invalidate();
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
expect(msaSeqCol!.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
53
|
-
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta
|
|
54
|
-
expect(msaSeqCol!.getTag('
|
|
56
|
+
expect(msaSeqCol!.getTag(DG.TAGS.UNITS), 'fasta');
|
|
57
|
+
expect(msaSeqCol!.getTag('aligned'), 'SEQ.MSA');
|
|
58
|
+
expect(msaSeqCol!.getTag('alphabet'), 'PT');
|
|
59
|
+
expect(msaSeqCol!.getTag('cell.renderer'), 'sequence');
|
|
55
60
|
|
|
56
61
|
dfList.push(df);
|
|
57
62
|
tvList.push(tv);
|
|
@@ -65,7 +70,7 @@ category('renderers', () => {
|
|
|
65
70
|
|
|
66
71
|
const srcCol: DG.Column = df.col('sequence')!;
|
|
67
72
|
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
68
|
-
expect(tgtCol.getTag('cell.renderer'), '
|
|
73
|
+
expect(tgtCol.getTag('cell.renderer'), 'sequence');
|
|
69
74
|
|
|
70
75
|
tvList.push(tv);
|
|
71
76
|
dfList.push(df);
|
|
@@ -1,30 +1,35 @@
|
|
|
1
|
-
import {after, before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
1
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import {sequenceSpace} from '../utils/sequence-space';
|
|
4
3
|
import {readDataframe} from './utils';
|
|
5
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
|
+
import { _testSequenceSpaceReturnsResult } from './sequence-space-utils';
|
|
6
6
|
|
|
7
7
|
category('sequenceSpace', async () => {
|
|
8
8
|
let testFastaDf: DG.DataFrame;
|
|
9
|
+
let testFastaTableView: DG.TableView;
|
|
10
|
+
let testHelmWithEmptyRows: DG.DataFrame;
|
|
11
|
+
let testHelmWithEmptyRowsTableView: DG.TableView;
|
|
9
12
|
|
|
10
13
|
before(async () => {
|
|
11
14
|
testFastaDf = await readDataframe('samples/sample_FASTA.csv');
|
|
12
|
-
|
|
15
|
+
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
16
|
+
testHelmWithEmptyRows = await readDataframe('samples/sample_HELM_empty_vals.csv');
|
|
17
|
+
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
13
18
|
});
|
|
14
19
|
|
|
15
20
|
after(async () => {
|
|
16
21
|
grok.shell.closeTable(testFastaDf);
|
|
22
|
+
testFastaTableView.close();
|
|
23
|
+
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
24
|
+
testHelmWithEmptyRowsTableView.close();
|
|
17
25
|
});
|
|
18
26
|
|
|
19
27
|
test('sequenceSpaceOpens', async () => {
|
|
20
|
-
|
|
21
|
-
seqCol: testFastaDf.col('Sequence')!,
|
|
22
|
-
methodName: 't-SNE',
|
|
23
|
-
similarityMetric: 'Levenshtein',
|
|
24
|
-
embedAxesNames: ['Embed_X', 'Embed_Y']
|
|
25
|
-
};
|
|
26
|
-
const res = await sequenceSpace(sequenceSpaceParams);
|
|
27
|
-
expect(res.coordinates != undefined, true);
|
|
28
|
-
expect(res.distance != undefined, true);
|
|
28
|
+
await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'Sequence');
|
|
29
29
|
});
|
|
30
|
+
|
|
31
|
+
test('sequenceSpaceOpensWithEmptyRows', async () => {
|
|
32
|
+
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'HELM');
|
|
33
|
+
});
|
|
34
|
+
|
|
30
35
|
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import { expect } from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import { sequenceSpaceTopMenu } from '../package';
|
|
5
|
+
|
|
6
|
+
export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorithm: string, colName: string) {
|
|
7
|
+
await grok.data.detectSemanticTypes(df);
|
|
8
|
+
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, 'Levenshtein', true);
|
|
9
|
+
expect(sp != null, true);
|
|
10
|
+
}
|