@datagrok/bio 1.7.22 → 1.7.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +6 -3
- package/dist/package-test.js +518 -198
- package/dist/package.js +274 -174
- package/package.json +13 -6
- package/{setup-unlink-clean → setup-unlink-clean.sh} +0 -0
- package/{setup → setup.sh} +1 -1
- package/src/__jest__/remote.test.ts +4 -4
- package/src/__jest__/test-node.ts +3 -2
- package/src/package-test.ts +1 -1
- package/src/package.ts +38 -34
- package/src/tests/WebLogo-positions-test.ts +102 -8
- package/src/tests/WebLogo-test.ts +1 -0
- package/src/tests/detectors-test.ts +21 -18
- package/src/tests/fasta-handler-test.ts +141 -0
- package/src/tests/renderers-test.ts +6 -2
- package/src/utils/cell-renderer.ts +103 -113
- package/src/utils/constants.ts +3 -4
- package/src/utils/convert.ts +12 -2
- package/src/utils/sequence-activity-cliffs.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +17 -0
- package/test-Bio-4f0c8bae6479-17115d45.html +358 -0
- package/test-Bio-4f0c8bae6479-dd77efbc.html +0 -348
package/package.json
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
|
-
"beta": false,
|
|
4
3
|
"friendlyName": "Bio",
|
|
5
|
-
"
|
|
4
|
+
"author": {
|
|
5
|
+
"name": "Leonid Stolbov",
|
|
6
|
+
"email": "lstolbov@datagrok.ai"
|
|
7
|
+
},
|
|
8
|
+
"version": "1.7.25",
|
|
6
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
10
|
"repository": {
|
|
8
11
|
"type": "git",
|
|
@@ -11,9 +14,9 @@
|
|
|
11
14
|
},
|
|
12
15
|
"dependencies": {
|
|
13
16
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^3.0.
|
|
15
|
-
"@datagrok-libraries/ml": "^3.0.
|
|
16
|
-
"@datagrok-libraries/utils": "^1.4
|
|
17
|
+
"@datagrok-libraries/bio": "^3.0.4",
|
|
18
|
+
"@datagrok-libraries/ml": "^3.0.3",
|
|
19
|
+
"@datagrok-libraries/utils": "^1.5.4",
|
|
17
20
|
"cash-dom": "latest",
|
|
18
21
|
"datagrok-api": "^1.5.0",
|
|
19
22
|
"dayjs": "^1.11.4",
|
|
@@ -33,7 +36,11 @@
|
|
|
33
36
|
"puppeteer": "^13.7.0",
|
|
34
37
|
"ts-jest": "^27.0.0",
|
|
35
38
|
"webpack": "latest",
|
|
36
|
-
"webpack-cli": "^4.10.0"
|
|
39
|
+
"webpack-cli": "^4.10.0",
|
|
40
|
+
"@types/js-yaml": "^4.0.5",
|
|
41
|
+
"js-yaml": "^4.1.0",
|
|
42
|
+
"@types/node-fetch": "^2.6.2",
|
|
43
|
+
"node-fetch": "^2.6.7"
|
|
37
44
|
},
|
|
38
45
|
"scripts": {
|
|
39
46
|
"link-api": "npm link datagrok-api",
|
|
File without changes
|
package/{setup → setup.sh}
RENAMED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import * as utils from './test-node';
|
|
6
6
|
import puppeteer from 'puppeteer';
|
|
7
7
|
|
|
8
|
-
const P_START_TIMEOUT: number =
|
|
8
|
+
const P_START_TIMEOUT: number = 3600000;
|
|
9
9
|
let browser: puppeteer.Browser;
|
|
10
10
|
let page: puppeteer.Page;
|
|
11
11
|
|
|
@@ -51,10 +51,10 @@ it('TEST', async () => {
|
|
|
51
51
|
let failReport = '';
|
|
52
52
|
for (let i = 0; i < df.rowCount; i++) {
|
|
53
53
|
if (cStatus.get(i)) {
|
|
54
|
-
passReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
54
|
+
passReport += `Test result : Success : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
55
55
|
} else {
|
|
56
56
|
failed = true;
|
|
57
|
-
failReport += `Test result : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
57
|
+
failReport += `Test result : Failed : ${targetPackage}.${cCat.get(i)}.${cName.get(i)} : ${cMessage.get(i)}\n`;
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
resolve({failReport, passReport, failed});
|
|
@@ -65,4 +65,4 @@ it('TEST', async () => {
|
|
|
65
65
|
console.log(r.passReport);
|
|
66
66
|
// @ts-ignore
|
|
67
67
|
expect(r.failed).checkOutput(false, r.failReport);
|
|
68
|
-
},
|
|
68
|
+
}, 3600000);
|
|
@@ -68,6 +68,7 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
68
68
|
});
|
|
69
69
|
|
|
70
70
|
const page = await browser.newPage();
|
|
71
|
+
await page.setDefaultNavigationTimeout(0);
|
|
71
72
|
await page.goto(`${url}/oauth/`);
|
|
72
73
|
await page.setCookie({name: 'auth', value: token});
|
|
73
74
|
await page.evaluate((token: any) => {
|
|
@@ -75,8 +76,8 @@ export async function getBrowserPage(puppeteer: any): Promise<{ browser: any, pa
|
|
|
75
76
|
}, token);
|
|
76
77
|
await page.goto(url);
|
|
77
78
|
try {
|
|
78
|
-
await page.waitForSelector('.grok-preloader');
|
|
79
|
-
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout:
|
|
79
|
+
await page.waitForSelector('.grok-preloader', { timeout: 1800000 });
|
|
80
|
+
await page.waitForFunction(() => document.querySelector('.grok-preloader') == null, {timeout: 3600000});
|
|
80
81
|
} catch (error) {
|
|
81
82
|
throw error;
|
|
82
83
|
}
|
package/src/package-test.ts
CHANGED
|
@@ -11,6 +11,7 @@ import './tests/activity-cliffs-tests';
|
|
|
11
11
|
import './tests/splitters-test';
|
|
12
12
|
import './tests/renderers-test';
|
|
13
13
|
import './tests/convert-test';
|
|
14
|
+
import './tests/fasta-handler-test';
|
|
14
15
|
import './tests/WebLogo-positions-test';
|
|
15
16
|
|
|
16
17
|
export const _package = new DG.Package();
|
|
@@ -21,7 +22,6 @@ export {tests};
|
|
|
21
22
|
//input: string category {optional: true}
|
|
22
23
|
//input: string test {optional: true}
|
|
23
24
|
//output: dataframe result
|
|
24
|
-
//top-menu: Tools | Dev | JS API Tests
|
|
25
25
|
export async function test(category: string, test: string): Promise<DG.DataFrame> {
|
|
26
26
|
const data = await runTests({category, test});
|
|
27
27
|
return DG.DataFrame.fromObjects(data)!;
|
package/src/package.ts
CHANGED
|
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
export const _package = new DG.Package();
|
|
7
7
|
|
|
8
|
-
import {
|
|
8
|
+
import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
|
|
9
9
|
import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
10
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
11
11
|
import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
|
|
@@ -20,37 +20,27 @@ import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CO
|
|
|
20
20
|
import {getMacroMol} from './utils/atomic-works';
|
|
21
21
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
22
22
|
import {convert} from './utils/convert';
|
|
23
|
-
import {lru} from './utils/cell-renderer';
|
|
24
23
|
import {representationsWidget} from './widgets/representations';
|
|
25
24
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
26
25
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
26
|
+
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils'
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return new Promise((resolve, reject) => {
|
|
38
|
-
// @ts-ignore
|
|
39
|
-
dojo.ready(function() { resolve(null); });
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
//name: Lru
|
|
44
|
-
//output: object lruCache
|
|
45
|
-
export function Lru() {
|
|
46
|
-
return lru;
|
|
29
|
+
//name: fastaSequenceCellRenderer
|
|
30
|
+
//tags: cellRenderer
|
|
31
|
+
//meta.cellType: Sequence
|
|
32
|
+
//meta.columnTags: units=fasta
|
|
33
|
+
//output: grid_cell_renderer result
|
|
34
|
+
export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
35
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
47
36
|
}
|
|
48
37
|
|
|
49
|
-
//name:
|
|
38
|
+
//name: separatorSequenceCellRenderer
|
|
50
39
|
//tags: cellRenderer
|
|
51
|
-
//meta.cellType:
|
|
40
|
+
//meta.cellType: Sequence
|
|
41
|
+
//meta.columnTags: units=separator
|
|
52
42
|
//output: grid_cell_renderer result
|
|
53
|
-
export function
|
|
43
|
+
export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
54
44
|
return new MacromoleculeSequenceCellRenderer();
|
|
55
45
|
}
|
|
56
46
|
|
|
@@ -161,16 +151,22 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
161
151
|
if (!encodedCol)
|
|
162
152
|
return;
|
|
163
153
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
154
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
155
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, encodedCol);
|
|
156
|
+
|
|
164
157
|
const chemSpaceParams = {
|
|
165
|
-
seqCol:
|
|
158
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
166
159
|
methodName: methodName,
|
|
167
160
|
similarityMetric: similarityMetric,
|
|
168
161
|
embedAxesNames: embedColsNames
|
|
169
162
|
};
|
|
170
163
|
const sequenceSpaceRes = await sequenceSpace(chemSpaceParams);
|
|
171
164
|
const embeddings = sequenceSpaceRes.coordinates;
|
|
172
|
-
for (const col of embeddings)
|
|
173
|
-
|
|
165
|
+
for (const col of embeddings) {
|
|
166
|
+
const listValues = col.toList();
|
|
167
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
168
|
+
table.columns.add(DG.Column.fromFloat32Array(col.name, listValues));
|
|
169
|
+
}
|
|
174
170
|
if (plotEmbeddings) {
|
|
175
171
|
for (const v of grok.shell.views) {
|
|
176
172
|
if (v.name === table.name)
|
|
@@ -241,6 +237,14 @@ export async function multipleSequenceAlignmentAny(table: DG.DataFrame, col: DG.
|
|
|
241
237
|
return msaCol;
|
|
242
238
|
}
|
|
243
239
|
|
|
240
|
+
//name: Bio | MSA
|
|
241
|
+
//tags: bio, panel
|
|
242
|
+
//input: column sequence { semType: Macromolecule }
|
|
243
|
+
//output: column result
|
|
244
|
+
export async function panelMSA(col: DG.Column): Promise<DG.Column | null> {
|
|
245
|
+
return multipleSequenceAlignmentAny(col.dataFrame, col);
|
|
246
|
+
}
|
|
247
|
+
|
|
244
248
|
//name: Composition Analysis
|
|
245
249
|
//top-menu: Bio | Composition Analysis
|
|
246
250
|
//output: viewer result
|
|
@@ -334,20 +338,20 @@ export function convertPanel(col: DG.Column): void {
|
|
|
334
338
|
convert(col);
|
|
335
339
|
}
|
|
336
340
|
|
|
337
|
-
//name:
|
|
341
|
+
//name: monomerCellRenderer
|
|
338
342
|
//tags: cellRenderer
|
|
339
|
-
//meta.cellType:
|
|
343
|
+
//meta.cellType: Monomer
|
|
340
344
|
//output: grid_cell_renderer result
|
|
341
|
-
export function
|
|
342
|
-
return new
|
|
345
|
+
export function monomerCellRenderer(): MonomerCellRenderer {
|
|
346
|
+
return new MonomerCellRenderer();
|
|
343
347
|
}
|
|
344
348
|
|
|
345
|
-
//name:
|
|
349
|
+
//name: MacromoleculeDifferenceCellRenderer
|
|
346
350
|
//tags: cellRenderer
|
|
347
|
-
//meta.cellType:
|
|
351
|
+
//meta.cellType: MacromoleculeDifference
|
|
348
352
|
//output: grid_cell_renderer result
|
|
349
|
-
export function
|
|
350
|
-
return new
|
|
353
|
+
export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
354
|
+
return new MacromoleculeDifferenceCellRenderer();
|
|
351
355
|
}
|
|
352
356
|
|
|
353
357
|
//name: testDetectMacromolecule
|
|
@@ -4,10 +4,12 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
import * as ui from 'datagrok-api/ui';
|
|
5
5
|
import * as DG from 'datagrok-api/dg';
|
|
6
6
|
import {PositionInfo, PositionMonomerInfo, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
import {Column} from 'datagrok-api/dg';
|
|
7
8
|
|
|
8
9
|
category('WebLogo-positions', () => {
|
|
9
10
|
let tvList: DG.TableView[];
|
|
10
11
|
let dfList: DG.DataFrame[];
|
|
12
|
+
let currentView: DG.View;
|
|
11
13
|
|
|
12
14
|
const csvDf1 = `seq
|
|
13
15
|
ATC-G-TTGC--
|
|
@@ -17,22 +19,24 @@ category('WebLogo-positions', () => {
|
|
|
17
19
|
-TC-GCTTGC--`;
|
|
18
20
|
|
|
19
21
|
|
|
20
|
-
const resShrinkEmptyTailDf1: PositionInfo[] = [];
|
|
21
|
-
|
|
22
22
|
before(async () => {
|
|
23
23
|
tvList = [];
|
|
24
24
|
dfList = [];
|
|
25
|
+
currentView = grok.shell.tv;
|
|
25
26
|
});
|
|
26
27
|
|
|
27
28
|
after(async () => {
|
|
28
|
-
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);
|
|
29
|
+
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df);});
|
|
29
30
|
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
31
|
+
currentView = grok.shell.tv;
|
|
30
32
|
});
|
|
31
|
-
|
|
32
33
|
test('allPositions', async () => {
|
|
33
34
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
34
35
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
35
36
|
|
|
37
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
38
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
39
|
+
|
|
36
40
|
const wlViewer: WebLogo = await df.plot.fromType('WebLogo') as unknown as WebLogo;
|
|
37
41
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
38
42
|
|
|
@@ -55,12 +59,102 @@ category('WebLogo-positions', () => {
|
|
|
55
59
|
new PositionInfo('11', {'-': new PositionMonomerInfo(5)}),
|
|
56
60
|
new PositionInfo('12', {'-': new PositionMonomerInfo(5)})
|
|
57
61
|
];
|
|
58
|
-
|
|
62
|
+
|
|
63
|
+
expect(positions.length, resAllDf1.length);
|
|
64
|
+
|
|
65
|
+
for (let i = 0; i < positions.length; i++) {
|
|
66
|
+
expect(positions[i].name, resAllDf1[i].name);
|
|
67
|
+
for (const key in positions[i].freq) {
|
|
68
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
});
|
|
73
|
+
test('positions with shrinkEmptyTail option true (filterd)', async () => {
|
|
74
|
+
let csvDf2 = `seq
|
|
75
|
+
-TC-G-TTGC--
|
|
76
|
+
-TC-GCTTGC--
|
|
77
|
+
-T--C-GT-
|
|
78
|
+
-T--C-GT-
|
|
79
|
+
-T--C-GT-
|
|
80
|
+
-T--CCGT-`;
|
|
81
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf2);
|
|
82
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
83
|
+
|
|
84
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
85
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
86
|
+
|
|
87
|
+
let seq: Column = df.getCol('seq');
|
|
88
|
+
df.filter.init((i) => {
|
|
89
|
+
return i > 2;
|
|
90
|
+
});
|
|
91
|
+
df.filter.fireChanged();
|
|
92
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'shrinkEmptyTail': true}) as unknown as WebLogo;
|
|
93
|
+
|
|
94
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
95
|
+
|
|
96
|
+
tvList.push(tv);
|
|
97
|
+
dfList.push(df);
|
|
98
|
+
|
|
99
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
100
|
+
|
|
101
|
+
const resAllDf1: PositionInfo[] = [
|
|
102
|
+
new PositionInfo('1', {'-': new PositionMonomerInfo(3)}),
|
|
103
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(3)}),
|
|
104
|
+
new PositionInfo('3', {'-': new PositionMonomerInfo(3)}),
|
|
105
|
+
new PositionInfo('4', {'-': new PositionMonomerInfo(3)}),
|
|
106
|
+
new PositionInfo('5', {'C': new PositionMonomerInfo(3)}),
|
|
107
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(2), 'C': new PositionMonomerInfo(1)}),
|
|
108
|
+
new PositionInfo('7', {'G': new PositionMonomerInfo(3)}),
|
|
109
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(3)}),
|
|
110
|
+
new PositionInfo('9', {'-': new PositionMonomerInfo(3)}),
|
|
111
|
+
];
|
|
112
|
+
|
|
113
|
+
expect(positions.length, resAllDf1.length);
|
|
114
|
+
|
|
115
|
+
for (let i = 0; i < positions.length; i++) {
|
|
116
|
+
expect(positions[i].name, resAllDf1[i].name);
|
|
117
|
+
for (const key in positions[i].freq) {
|
|
118
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('positions with skipEmptyPositions option', async () => {
|
|
125
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDf1);
|
|
126
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
127
|
+
|
|
128
|
+
df.getCol('seq').semType = 'Macromolecule';
|
|
129
|
+
df.getCol('seq').setTag('units', 'fasta:SEQ.MSA:DNA');
|
|
130
|
+
|
|
131
|
+
const wlViewer: WebLogo = await df.plot.fromType('WebLogo', {'skipEmptyPositions': true}) as unknown as WebLogo;
|
|
132
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
133
|
+
|
|
134
|
+
tvList.push(tv);
|
|
135
|
+
dfList.push(df);
|
|
136
|
+
|
|
137
|
+
const positions: PositionInfo[] = wlViewer['positions'];
|
|
138
|
+
|
|
139
|
+
const resAllDf1: PositionInfo[] = [
|
|
140
|
+
new PositionInfo('1', {'A': new PositionMonomerInfo(2), '-': new PositionMonomerInfo(3)}),
|
|
141
|
+
new PositionInfo('2', {'T': new PositionMonomerInfo(5)}),
|
|
142
|
+
new PositionInfo('3', {'C': new PositionMonomerInfo(5)}),
|
|
143
|
+
new PositionInfo('5', {'G': new PositionMonomerInfo(5)}),
|
|
144
|
+
new PositionInfo('6', {'-': new PositionMonomerInfo(3), 'C': new PositionMonomerInfo(2)}),
|
|
145
|
+
new PositionInfo('7', {'T': new PositionMonomerInfo(5)}),
|
|
146
|
+
new PositionInfo('8', {'T': new PositionMonomerInfo(5)}),
|
|
147
|
+
new PositionInfo('9', {'G': new PositionMonomerInfo(5)}),
|
|
148
|
+
new PositionInfo('10', {'C': new PositionMonomerInfo(5)})
|
|
149
|
+
];
|
|
150
|
+
|
|
151
|
+
expect(positions.length, resAllDf1.length);
|
|
152
|
+
|
|
59
153
|
for (let i = 0; i < positions.length; i++) {
|
|
60
154
|
expect(positions[i].name, resAllDf1[i].name);
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
155
|
+
for (const key in positions[i].freq) {
|
|
156
|
+
expect(positions[i].freq[key].count, resAllDf1[i].freq[key].count);
|
|
157
|
+
}
|
|
64
158
|
}
|
|
65
159
|
|
|
66
160
|
});
|
|
@@ -190,6 +190,7 @@ MWRSWY-CKHP
|
|
|
190
190
|
};
|
|
191
191
|
};
|
|
192
192
|
|
|
193
|
+
|
|
193
194
|
test('NegativeEmpty', async () => { await _testNeg(readCsv('csvDfEmpty', csvDfEmpty), 'col1'); });
|
|
194
195
|
test('Negative1', async () => { await _testNeg(readCsv('csvDf1', csvDf1), 'col1'); });
|
|
195
196
|
test('Negative2', async () => { await _testNeg(readCsv('csvDf2', csvDf2), 'col1'); });
|
|
@@ -197,44 +198,44 @@ MWRSWY-CKHP
|
|
|
197
198
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
198
199
|
|
|
199
200
|
test('Dna1', async () => {
|
|
200
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta
|
|
201
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', 'fasta', 'SEQ', 'DNA');
|
|
201
202
|
});
|
|
202
203
|
test('Rna1', async () => {
|
|
203
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta
|
|
204
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', 'fasta', 'SEQ', 'RNA');
|
|
204
205
|
});
|
|
205
206
|
test('AA1', async () => {
|
|
206
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta
|
|
207
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', 'fasta', 'SEQ', 'PT');
|
|
207
208
|
});
|
|
208
209
|
test('MsaDna1', async () => {
|
|
209
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta
|
|
210
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', 'fasta', 'SEQ.MSA', 'DNA');
|
|
210
211
|
});
|
|
211
212
|
|
|
212
213
|
test('MsaAA1', async () => {
|
|
213
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta
|
|
214
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', 'fasta', 'SEQ.MSA', 'PT');
|
|
214
215
|
});
|
|
215
216
|
|
|
216
217
|
test('SepDna', async () => {
|
|
217
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator
|
|
218
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', 'separator', 'SEQ', 'DNA', '*');
|
|
218
219
|
});
|
|
219
220
|
test('SepRna', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator
|
|
221
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', 'separator', 'SEQ', 'RNA', '*');
|
|
221
222
|
});
|
|
222
223
|
test('SepPt', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator
|
|
224
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq', 'separator', 'SEQ', 'PT', '-');
|
|
224
225
|
});
|
|
225
226
|
test('SepUn1', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator
|
|
227
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq', 'separator', 'SEQ', 'UN', '-');
|
|
227
228
|
});
|
|
228
229
|
test('SepUn2', async () => {
|
|
229
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator
|
|
230
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq', 'separator', 'SEQ', 'UN', '/');
|
|
230
231
|
});
|
|
231
232
|
|
|
232
233
|
test('SepMsaN1', async () => {
|
|
233
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator
|
|
234
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq', 'separator', 'SEQ.MSA', 'DNA', '-');
|
|
234
235
|
});
|
|
235
236
|
|
|
236
237
|
test('SamplesFastaCsvPt', async () => {
|
|
237
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta
|
|
238
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
238
239
|
});
|
|
239
240
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
240
241
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -247,7 +248,7 @@ MWRSWY-CKHP
|
|
|
247
248
|
});
|
|
248
249
|
|
|
249
250
|
test('SamplesFastaFastaPt', async () => {
|
|
250
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta
|
|
251
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
251
252
|
});
|
|
252
253
|
|
|
253
254
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -265,7 +266,7 @@ MWRSWY-CKHP
|
|
|
265
266
|
});
|
|
266
267
|
|
|
267
268
|
test('samplesMsaComplexUn', async () => {
|
|
268
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator
|
|
269
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA', 'separator', 'SEQ.MSA', 'UN', '/');
|
|
269
270
|
});
|
|
270
271
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
271
272
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -280,7 +281,7 @@ MWRSWY-CKHP
|
|
|
280
281
|
});
|
|
281
282
|
|
|
282
283
|
test('samplesHelmCsvHELM', async () => {
|
|
283
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null);
|
|
284
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', 'HELM', null, null, null);
|
|
284
285
|
});
|
|
285
286
|
|
|
286
287
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -296,7 +297,7 @@ MWRSWY-CKHP
|
|
|
296
297
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
297
298
|
});
|
|
298
299
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
299
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM');
|
|
300
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', 'HELM', null, null, null);
|
|
300
301
|
});
|
|
301
302
|
test('samplesTestHelmNegativeValid', async () => {
|
|
302
303
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -328,7 +329,7 @@ MWRSWY-CKHP
|
|
|
328
329
|
});
|
|
329
330
|
|
|
330
331
|
test('samplesFastaPtPosSequence', async () => {
|
|
331
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta
|
|
332
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', 'fasta', 'SEQ', 'PT');
|
|
332
333
|
});
|
|
333
334
|
|
|
334
335
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -383,7 +384,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
383
384
|
}
|
|
384
385
|
}
|
|
385
386
|
|
|
386
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
387
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, aligned: string | null, alphabet: string | null, separator: string | null = null) {
|
|
387
388
|
const df: DG.DataFrame = await readDf();
|
|
388
389
|
const col: DG.Column = df.col(colName)!;
|
|
389
390
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -392,6 +393,8 @@ export async function _testPos(readDf: DfReaderFunc, colName: string, units: str
|
|
|
392
393
|
|
|
393
394
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
394
395
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
396
|
+
expect(col.getTag('aligned'), aligned);
|
|
397
|
+
expect(col.getTag('alphabet'), alphabet);
|
|
395
398
|
if (separator)
|
|
396
399
|
expect(col.getTag('separator'), separator);
|
|
397
400
|
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/* Do not change these import lines to match external modules in webpack configuration */
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
|
|
6
|
+
import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
category('fastaFileHandler', () => {
|
|
12
|
+
const fastaNormalFormatting = `>description:1
|
|
13
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
14
|
+
|
|
15
|
+
>description:2
|
|
16
|
+
MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
17
|
+
|
|
18
|
+
>description:3
|
|
19
|
+
MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
|
|
20
|
+
|
|
21
|
+
>description:4
|
|
22
|
+
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
|
|
23
|
+
`;
|
|
24
|
+
|
|
25
|
+
const fastaExtraSpaces = `>description:1
|
|
26
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
27
|
+
|
|
28
|
+
>description:2
|
|
29
|
+
MI EVF LFGIVLGLI PITLAGLFVTAY LQYRRGDQLDL
|
|
30
|
+
|
|
31
|
+
>description:3
|
|
32
|
+
M MELVLKTI IGPI VVGVVLR IVDKWLNKDK
|
|
33
|
+
|
|
34
|
+
>description:4
|
|
35
|
+
MDR TDEVSNHTHDKP TLTWFEEIFEEYHSPFHN
|
|
36
|
+
`;
|
|
37
|
+
|
|
38
|
+
const fastaExtraNewlines = `>description:1
|
|
39
|
+
|
|
40
|
+
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
41
|
+
|
|
42
|
+
>description:2
|
|
43
|
+
MIEVF
|
|
44
|
+
LFGIVLGLI
|
|
45
|
+
PITLAGLFVTA
|
|
46
|
+
YLQYRRGDQLDL
|
|
47
|
+
|
|
48
|
+
>description:3
|
|
49
|
+
M
|
|
50
|
+
ME
|
|
51
|
+
|
|
52
|
+
LVLKTIIG
|
|
53
|
+
|
|
54
|
+
PIVVGVVLRI
|
|
55
|
+
VDKWLNKDK
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
>description:4
|
|
59
|
+
|
|
60
|
+
MDRT
|
|
61
|
+
|
|
62
|
+
DEVSNHTHDKP
|
|
63
|
+
|
|
64
|
+
TLTWFEEIFEE
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
YHSPFHN
|
|
69
|
+
`;
|
|
70
|
+
// a "broken" fasta file
|
|
71
|
+
// const fastaBroken = `
|
|
72
|
+
|
|
73
|
+
// >description:1
|
|
74
|
+
// MDYKETLLM
|
|
75
|
+
// PKTDFPMRGGLPN
|
|
76
|
+
// KEPQIQEKW
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
// >description:2
|
|
81
|
+
// MIEVFL FGIVLGLIPI TLAGLFVTAYLQYRRGDQLDL
|
|
82
|
+
|
|
83
|
+
// >description:3
|
|
84
|
+
|
|
85
|
+
// M
|
|
86
|
+
// MELVLKTIIGP
|
|
87
|
+
// IVVGVVLR
|
|
88
|
+
// IVDKWLNKD
|
|
89
|
+
|
|
90
|
+
// K
|
|
91
|
+
|
|
92
|
+
// >description:4
|
|
93
|
+
// MDRTDEV
|
|
94
|
+
|
|
95
|
+
// SNHTHDKP
|
|
96
|
+
// TLTWFEEI
|
|
97
|
+
// FEE
|
|
98
|
+
|
|
99
|
+
// YHSPFHN
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
// `;
|
|
103
|
+
|
|
104
|
+
const descriptionsArray = [
|
|
105
|
+
'description:1', 'description:2', 'description:3', 'description:4',
|
|
106
|
+
];
|
|
107
|
+
const descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
108
|
+
|
|
109
|
+
const sequencesArray = [
|
|
110
|
+
'MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW',
|
|
111
|
+
'MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL',
|
|
112
|
+
'MMELVLKTIIGPIVVGVVLRIVDKWLNKDK',
|
|
113
|
+
'MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN',
|
|
114
|
+
];
|
|
115
|
+
const sequencesCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
116
|
+
sequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
117
|
+
UnitsHandler.setUnitsToFastaColumn(sequencesCol);
|
|
118
|
+
|
|
119
|
+
const fastaDf = DG.DataFrame.fromColumns([descriptionCol, sequencesCol]);
|
|
120
|
+
|
|
121
|
+
function _testColumnsParser(inputFasta: string) {
|
|
122
|
+
const ffh = new FastaFileHandler(inputFasta);
|
|
123
|
+
const parsedDescriptionsArray = ffh.descriptionsArray;
|
|
124
|
+
const parsedSequencesArray = ffh.sequencesArray;
|
|
125
|
+
expectArray(
|
|
126
|
+
[parsedDescriptionsArray, parsedSequencesArray],
|
|
127
|
+
[descriptionsArray, sequencesArray]
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// test parser
|
|
132
|
+
test('testNormalFormatting', async () => {
|
|
133
|
+
_testColumnsParser(fastaNormalFormatting);
|
|
134
|
+
});
|
|
135
|
+
test('testExtraSpaces', async () => {
|
|
136
|
+
_testColumnsParser(fastaExtraSpaces);
|
|
137
|
+
});
|
|
138
|
+
test('testExtraNewlines', async () => {
|
|
139
|
+
_testColumnsParser(fastaExtraNewlines);
|
|
140
|
+
});
|
|
141
|
+
});
|