@datagrok/bio 2.1.12 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -13168
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -10560
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +24 -25
- package/src/analysis/sequence-activity-cliffs.ts +11 -9
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +3 -3
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +4 -4
- package/src/package-test.ts +9 -2
- package/src/package.ts +215 -131
- package/src/substructure-search/substructure-search.ts +19 -16
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +113 -57
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +4 -5
- package/src/tests/checkInputColumn-tests.ts +1 -1
- package/src/tests/converters-test.ts +52 -17
- package/src/tests/detectors-benchmark-tests.ts +3 -2
- package/src/tests/detectors-tests.ts +177 -172
- package/src/tests/fasta-export-tests.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +21 -19
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +4 -5
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +1 -1
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +88 -35
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +8 -2
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +44 -20
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +2 -1
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +113 -72
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
4
3
|
|
|
5
|
-
import {after, before, category,
|
|
4
|
+
import {after, before, category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {ALPHABET, NOTATION, SplitterFunc, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
6
|
import {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
PositionInfo,
|
|
10
|
-
PositionMonomerInfo,
|
|
11
|
-
TAGS as bioTAGS,
|
|
7
|
+
countForMonomerAtPosition,
|
|
8
|
+
FilterSources,
|
|
9
|
+
PositionInfo as PI,
|
|
10
|
+
PositionMonomerInfo as PMI,
|
|
12
11
|
WebLogoViewer
|
|
13
|
-
} from '
|
|
12
|
+
} from '../viewers/web-logo-viewer';
|
|
14
13
|
|
|
15
14
|
category('WebLogo-positions', () => {
|
|
16
15
|
let tvList: DG.TableView[];
|
|
@@ -28,13 +27,14 @@ ATC-G-TTGC--
|
|
|
28
27
|
before(async () => {
|
|
29
28
|
tvList = [];
|
|
30
29
|
dfList = [];
|
|
31
|
-
currentView = grok.shell.v;
|
|
30
|
+
// currentView = grok.shell.v;
|
|
32
31
|
});
|
|
33
32
|
|
|
34
33
|
after(async () => {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
// Closing opened views causes the error 'Cannot read properties of null (reading 'f')'
|
|
35
|
+
// dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
36
|
+
// tvList.forEach((tv: DG.TableView) => tv.close());
|
|
37
|
+
// grok.shell.v = currentView;
|
|
38
38
|
});
|
|
39
39
|
|
|
40
40
|
test('allPositions', async () => {
|
|
@@ -53,21 +53,21 @@ ATC-G-TTGC--
|
|
|
53
53
|
tvList.push(tv);
|
|
54
54
|
dfList.push(df);
|
|
55
55
|
|
|
56
|
-
const positions:
|
|
57
|
-
|
|
58
|
-
const resAllDf1:
|
|
59
|
-
new
|
|
60
|
-
new
|
|
61
|
-
new
|
|
62
|
-
new
|
|
63
|
-
new
|
|
64
|
-
new
|
|
65
|
-
new
|
|
66
|
-
new
|
|
67
|
-
new
|
|
68
|
-
new
|
|
69
|
-
new
|
|
70
|
-
new
|
|
56
|
+
const positions: PI[] = wlViewer['positions'];
|
|
57
|
+
|
|
58
|
+
const resAllDf1: PI[] = [
|
|
59
|
+
new PI(0, '1', {'A': new PMI(2), '-': new PMI(3)}),
|
|
60
|
+
new PI(1, '2', {'T': new PMI(5)}),
|
|
61
|
+
new PI(2, '3', {'C': new PMI(5)}),
|
|
62
|
+
new PI(3, '4', {'-': new PMI(5)}),
|
|
63
|
+
new PI(4, '5', {'G': new PMI(5)}),
|
|
64
|
+
new PI(5, '6', {'-': new PMI(3), 'C': new PMI(2)}),
|
|
65
|
+
new PI(6, '7', {'T': new PMI(5)}),
|
|
66
|
+
new PI(7, '8', {'T': new PMI(5)}),
|
|
67
|
+
new PI(8, '9', {'G': new PMI(5)}),
|
|
68
|
+
new PI(9, '10', {'C': new PMI(5)}),
|
|
69
|
+
new PI(10, '11', {'-': new PMI(5)}),
|
|
70
|
+
new PI(11, '12', {'-': new PMI(5)}),
|
|
71
71
|
];
|
|
72
72
|
|
|
73
73
|
expect(positions.length, resAllDf1.length);
|
|
@@ -108,18 +108,18 @@ ATC-G-TTGC--
|
|
|
108
108
|
tvList.push(tv);
|
|
109
109
|
dfList.push(df);
|
|
110
110
|
|
|
111
|
-
const positions:
|
|
112
|
-
|
|
113
|
-
const resAllDf1:
|
|
114
|
-
new
|
|
115
|
-
new
|
|
116
|
-
new
|
|
117
|
-
new
|
|
118
|
-
new
|
|
119
|
-
new
|
|
120
|
-
new
|
|
121
|
-
new
|
|
122
|
-
new
|
|
111
|
+
const positions: PI[] = wlViewer['positions'];
|
|
112
|
+
|
|
113
|
+
const resAllDf1: PI[] = [
|
|
114
|
+
new PI(0, '1', {'-': new PMI(3)}),
|
|
115
|
+
new PI(1, '2', {'T': new PMI(3)}),
|
|
116
|
+
new PI(2, '3', {'-': new PMI(3)}),
|
|
117
|
+
new PI(3, '4', {'-': new PMI(3)}),
|
|
118
|
+
new PI(4, '5', {'C': new PMI(3)}),
|
|
119
|
+
new PI(5, '6', {'-': new PMI(2), 'C': new PMI(1)}),
|
|
120
|
+
new PI(6, '7', {'G': new PMI(3)}),
|
|
121
|
+
new PI(7, '8', {'T': new PMI(3)}),
|
|
122
|
+
new PI(8, '9', {'-': new PMI(3)}),
|
|
123
123
|
];
|
|
124
124
|
|
|
125
125
|
expect(positions.length, resAllDf1.length);
|
|
@@ -149,27 +149,83 @@ ATC-G-TTGC--
|
|
|
149
149
|
tvList.push(tv);
|
|
150
150
|
dfList.push(df);
|
|
151
151
|
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
const
|
|
155
|
-
new
|
|
156
|
-
new
|
|
157
|
-
new
|
|
158
|
-
new
|
|
159
|
-
new
|
|
160
|
-
new
|
|
161
|
-
new
|
|
162
|
-
new
|
|
163
|
-
new
|
|
152
|
+
const resPosList: PI[] = wlViewer['positions'];
|
|
153
|
+
|
|
154
|
+
const tgtPosList: PI[] = [
|
|
155
|
+
new PI(0, '1', {'A': new PMI(2), '-': new PMI(3)}),
|
|
156
|
+
new PI(1, '2', {'T': new PMI(5)}),
|
|
157
|
+
new PI(2, '3', {'C': new PMI(5)}),
|
|
158
|
+
new PI(4, '5', {'G': new PMI(5)}),
|
|
159
|
+
new PI(5, '6', {'-': new PMI(3), 'C': new PMI(2)}),
|
|
160
|
+
new PI(6, '7', {'T': new PMI(5)}),
|
|
161
|
+
new PI(7, '8', {'T': new PMI(5)}),
|
|
162
|
+
new PI(8, '9', {'G': new PMI(5)}),
|
|
163
|
+
new PI(9, '10', {'C': new PMI(5)})
|
|
164
164
|
];
|
|
165
165
|
|
|
166
|
-
expect(
|
|
166
|
+
expect(resPosList.length, tgtPosList.length);
|
|
167
|
+
for (let posI = 0; posI < resPosList.length; posI++) {
|
|
168
|
+
const resPos = resPosList[posI];
|
|
169
|
+
const tgtPos = tgtPosList[posI];
|
|
170
|
+
expectPositionInfo(resPos, tgtPos);
|
|
171
|
+
}
|
|
172
|
+
});
|
|
167
173
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
174
|
+
test('count sequences for monomer at position', async () => {
|
|
175
|
+
const df: DG.DataFrame = buildDfWithSeqCol(csvDf1, NOTATION.FASTA, ALPHABET.DNA, 'SEQ.MSA');
|
|
176
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
177
|
+
|
|
178
|
+
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
179
|
+
|
|
180
|
+
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo', {
|
|
181
|
+
startPositionName: '3',
|
|
182
|
+
endPositionName: '7',
|
|
183
|
+
skipEmptyPositions: true
|
|
184
|
+
})) as WebLogoViewer;
|
|
185
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
186
|
+
|
|
187
|
+
tvList.push(tv);
|
|
188
|
+
dfList.push(df);
|
|
189
|
+
|
|
190
|
+
const resPosList: PI[] = wlViewer['positions'];
|
|
191
|
+
const tgtPosList: PI[] = [
|
|
192
|
+
new PI(2, '3', {'C': new PMI(5)}),
|
|
193
|
+
new PI(4, '5', {'G': new PMI(5)}),
|
|
194
|
+
new PI(5, '6', {'-': new PMI(3), 'C': new PMI(2)}),
|
|
195
|
+
new PI(6, '7', {'T': new PMI(5)}),
|
|
196
|
+
];
|
|
197
|
+
|
|
198
|
+
expect(resPosList.length, tgtPosList.length);
|
|
199
|
+
for (let posI = 0; posI < resPosList.length; posI++) {
|
|
200
|
+
const resPos = resPosList[posI];
|
|
201
|
+
const tgtPos = tgtPosList[posI];
|
|
202
|
+
expectPositionInfo(resPos, tgtPos);
|
|
173
203
|
}
|
|
204
|
+
|
|
205
|
+
const splitter: SplitterFunc = wlViewer['splitter']!;
|
|
206
|
+
const atPI1: PI = resPosList[1];
|
|
207
|
+
const countAt1 = countForMonomerAtPosition(df, seqCol, df.filter, splitter, 'G', atPI1);
|
|
208
|
+
expect(countAt1, 5);
|
|
174
209
|
});
|
|
175
210
|
});
|
|
211
|
+
|
|
212
|
+
function expectPositionInfo(actualPos: PI, expectedPos: PI): void {
|
|
213
|
+
expect(actualPos.name, expectedPos.name);
|
|
214
|
+
expectArray(Object.keys(actualPos.freq), Object.keys(expectedPos.freq));
|
|
215
|
+
for (const key in actualPos.freq) {
|
|
216
|
+
//
|
|
217
|
+
expect(actualPos.freq[key].count, expectedPos.freq[key].count);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function buildDfWithSeqCol(csv: string, notation: NOTATION, alphabet: ALPHABET, aligned: string): DG.DataFrame {
|
|
222
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
223
|
+
|
|
224
|
+
const seqCol: DG.Column = df.getCol('seq');
|
|
225
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
226
|
+
seqCol.setTag(DG.TAGS.UNITS, notation);
|
|
227
|
+
seqCol.setTag(bioTAGS.alphabet, alphabet);
|
|
228
|
+
seqCol.setTag(bioTAGS.aligned, aligned);
|
|
229
|
+
|
|
230
|
+
return df;
|
|
231
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
|
|
5
|
+
category('_first', () => {
|
|
6
|
+
/** The first test of the package to evaluate problems with imports. */
|
|
7
|
+
test('_first', async () => {
|
|
8
|
+
});
|
|
9
|
+
});
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
|
-
import * as DG from 'datagrok-api/dg';
|
|
4
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {after, before, category, test} from '@datagrok-libraries/utils/src/test';
|
|
5
6
|
|
|
6
7
|
import {readDataframe} from './utils';
|
|
7
8
|
import {_testActivityCliffsOpen} from './activity-cliffs-utils';
|
|
@@ -22,8 +23,8 @@ category('activityCliffs', async () => {
|
|
|
22
23
|
});
|
|
23
24
|
|
|
24
25
|
after(async () => {
|
|
25
|
-
for (const
|
|
26
|
-
for (const
|
|
26
|
+
// for (const df of dfList) grok.shell.closeTable(df);
|
|
27
|
+
// for (const view of viewList) view.close();
|
|
27
28
|
});
|
|
28
29
|
|
|
29
30
|
test('activityCliffsOpens', async () => {
|
|
@@ -33,7 +34,7 @@ category('activityCliffs', async () => {
|
|
|
33
34
|
viewList.push(actCliffsTableView);
|
|
34
35
|
|
|
35
36
|
await _testActivityCliffsOpen(actCliffsDf, 57, 'UMAP', 'MSA');
|
|
36
|
-
});
|
|
37
|
+
}, {skipReason: 'GROK-12774'});
|
|
37
38
|
|
|
38
39
|
test('activityCliffsWithEmptyRows', async () => {
|
|
39
40
|
actCliffsDfWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
@@ -42,5 +43,5 @@ category('activityCliffs', async () => {
|
|
|
42
43
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
43
44
|
|
|
44
45
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 57, 'UMAP', 'MSA');
|
|
45
|
-
});
|
|
46
|
+
}, {skipReason: 'GROK-12774'});
|
|
46
47
|
});
|
|
@@ -2,20 +2,28 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {delay, expect} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {_package} from '../package-test';
|
|
6
5
|
import {activityCliffs} from '../package';
|
|
7
6
|
|
|
8
7
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: string, colName: string) {
|
|
9
8
|
await grok.data.detectSemanticTypes(df);
|
|
10
9
|
const scatterPlot = await activityCliffs(
|
|
11
|
-
df,
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
df, df.getCol(colName), df.getCol('Activity'),
|
|
11
|
+
50, method);
|
|
12
|
+
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
13
|
+
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
14
|
+
// similarity: 50, methodName: method
|
|
15
|
+
// })) as DG.Viewer | undefined;
|
|
16
|
+
|
|
17
|
+
// test scatter plot without activityCliffs passed
|
|
18
|
+
// const scatterPlot = (await df.plot.fromType(DG.VIEWER.SCATTER_PLOT, {})) as DG.Viewer;
|
|
19
|
+
// const libHelper: IMonomerLibHelper = (await grok.functions.call('Bio:getMonomerLibHelper'));
|
|
20
|
+
// const k = 11;
|
|
16
21
|
|
|
17
22
|
expect(scatterPlot != null, true);
|
|
18
23
|
|
|
19
|
-
const cliffsLink = Array.from(scatterPlot!.root.children).
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
const cliffsLink = Array.from(scatterPlot!.root.children).find((el) => {
|
|
25
|
+
const classList: string[] = el.className.split(' ');
|
|
26
|
+
return ['ui-btn', 'ui-btn-ok'].every((reqClassName) => classList.includes(reqClassName));
|
|
27
|
+
});
|
|
28
|
+
expect((cliffsLink as HTMLElement).innerText.toLowerCase(), `${numberCliffs} cliffs`);
|
|
29
|
+
}
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -4,17 +4,16 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
|
|
5
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {
|
|
7
|
-
AminoacidsPalettes,
|
|
8
7
|
getAlphabetSimilarity,
|
|
9
8
|
getStats,
|
|
10
9
|
monomerToShort,
|
|
11
|
-
Nucleotides,
|
|
12
|
-
NucleotidesPalettes,
|
|
13
10
|
pickUpPalette,
|
|
14
11
|
splitterAsFasta,
|
|
15
12
|
splitterAsHelm,
|
|
16
|
-
|
|
17
|
-
} from '@datagrok-libraries/bio';
|
|
13
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
14
|
+
import {Nucleotides, NucleotidesPalettes} from '@datagrok-libraries/bio/src/nucleotides';
|
|
15
|
+
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
16
|
+
import {UnknownSeqPalette} from '@datagrok-libraries/bio/src/unknown';
|
|
18
17
|
|
|
19
18
|
category('bio', () => {
|
|
20
19
|
const csvDfN1: string = `seq
|
|
@@ -6,7 +6,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
6
6
|
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
8
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
9
|
-
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio';
|
|
9
|
+
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
10
|
|
|
11
11
|
category('checkInputColumn', () => {
|
|
12
12
|
const csv = `seq
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as grok from 'datagrok-api/grok';
|
|
4
3
|
|
|
5
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
5
|
|
|
7
6
|
import {ConverterFunc} from './types';
|
|
8
|
-
import {NOTATION
|
|
7
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
9
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
10
|
|
|
10
11
|
// import {mmSemType} from '../const';
|
|
11
12
|
// import {importFasta} from '../package';
|
|
@@ -28,6 +29,10 @@ category('converters', () => {
|
|
|
28
29
|
separatorGaps = 'separatorGaps',
|
|
29
30
|
helmGaps = 'helmGaps',
|
|
30
31
|
|
|
32
|
+
fastaUn = 'fastaUn',
|
|
33
|
+
separatorUn = 'separatorUn',
|
|
34
|
+
helmUn = 'helmUn',
|
|
35
|
+
|
|
31
36
|
helmLoneDeoxyribose = 'helmLoneDeoxyribose',
|
|
32
37
|
helmLoneRibose = 'helmLoneRibose',
|
|
33
38
|
helmLonePhosphorus = 'helmLonePhosphorus',
|
|
@@ -96,6 +101,22 @@ F/K/P//Q//S/E/Y/V
|
|
|
96
101
|
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$
|
|
97
102
|
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$
|
|
98
103
|
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
|
|
104
|
+
`,
|
|
105
|
+
|
|
106
|
+
fastaUn: `seq
|
|
107
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
108
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
109
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
110
|
+
`,
|
|
111
|
+
separatorUn: `seq
|
|
112
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
113
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
114
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
|
+
`,
|
|
116
|
+
helmUn: `seq
|
|
117
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
118
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
119
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
99
120
|
`,
|
|
100
121
|
helmLoneDeoxyribose: `seq
|
|
101
122
|
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$
|
|
@@ -114,23 +135,17 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
114
135
|
`,
|
|
115
136
|
};
|
|
116
137
|
|
|
117
|
-
const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
118
|
-
|
|
119
138
|
/** Also detects semantic types
|
|
120
139
|
* @param {string} key
|
|
121
140
|
* @return {Promise<DG.DataFrame>}
|
|
122
141
|
*/
|
|
123
|
-
function readCsv(key: string): Promise<DG.DataFrame> {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
})();
|
|
131
|
-
}
|
|
132
|
-
return _csvDfs[key];
|
|
133
|
-
};
|
|
142
|
+
async function readCsv(key: string): Promise<DG.DataFrame> {
|
|
143
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
144
|
+
const csv: string = _csvTxts[key];
|
|
145
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
146
|
+
await grok.data.detectSemanticTypes(df);
|
|
147
|
+
return df;
|
|
148
|
+
}
|
|
134
149
|
|
|
135
150
|
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
136
151
|
if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
|
|
@@ -142,9 +157,9 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
142
157
|
expect(resCol.getTag('units'), tgtNotation);
|
|
143
158
|
return resCol;
|
|
144
159
|
};
|
|
145
|
-
}
|
|
160
|
+
}
|
|
146
161
|
|
|
147
|
-
async function _testConvert(srcKey:
|
|
162
|
+
async function _testConvert(srcKey: Samples, converter: ConverterFunc, tgtKey: Samples) {
|
|
148
163
|
const srcDf: DG.DataFrame = await readCsv(srcKey);
|
|
149
164
|
const srcCol: DG.Column = srcDf.getCol('seq');
|
|
150
165
|
|
|
@@ -173,6 +188,9 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
173
188
|
test('testFastaGapsToSeparator', async () => {
|
|
174
189
|
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
175
190
|
});
|
|
191
|
+
test('testFastaUnToSeparator', async () => {
|
|
192
|
+
await _testConvert(Samples.fastaUn, converter(NOTATION.SEPARATOR, '-'), Samples.separatorUn);
|
|
193
|
+
});
|
|
176
194
|
|
|
177
195
|
// fasta -> helm
|
|
178
196
|
test('testFastaPtToHelm', async () => {
|
|
@@ -187,6 +205,10 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
187
205
|
test('testFastaGapsToHelm', async () => {
|
|
188
206
|
await _testConvert(Samples.fastaGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
189
207
|
});
|
|
208
|
+
// TODO: testFastaUnToHelm
|
|
209
|
+
// test('testFastaUnToHelm', async () => {
|
|
210
|
+
// await _testConvert(Samples.fastaUn, converter(NOTATION.HELM), Samples.helmUn);
|
|
211
|
+
// });
|
|
190
212
|
|
|
191
213
|
|
|
192
214
|
// SEPARATOR tests
|
|
@@ -203,6 +225,9 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
203
225
|
test('testSeparatorGapsToFasta', async () => {
|
|
204
226
|
await _testConvert(Samples.separatorGaps, converter(NOTATION.FASTA), Samples.fastaGaps);
|
|
205
227
|
});
|
|
228
|
+
test('testSeparatorUnToFasta', async () => {
|
|
229
|
+
await _testConvert(Samples.separatorUn, converter(NOTATION.FASTA), Samples.fastaUn);
|
|
230
|
+
});
|
|
206
231
|
|
|
207
232
|
// separator -> helm
|
|
208
233
|
test('testSeparatorPtToHelm', async () => {
|
|
@@ -217,6 +242,10 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
217
242
|
test('testSeparatorGapsToHelm', async () => {
|
|
218
243
|
await _testConvert(Samples.separatorGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
219
244
|
});
|
|
245
|
+
// TODO: testSeparatorUnToHelm
|
|
246
|
+
// test('testSeparatorUnToHelm', async () => {
|
|
247
|
+
// await _testConvert(Samples.separatorUn, converter(NOTATION.HELM), Samples.helmUn);
|
|
248
|
+
// });
|
|
220
249
|
|
|
221
250
|
|
|
222
251
|
// HELM tests
|
|
@@ -230,6 +259,9 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
230
259
|
test('testHelmPtToFasta', async () => {
|
|
231
260
|
await _testConvert(Samples.helmPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
232
261
|
});
|
|
262
|
+
test('testHelmUnToFasta', async () => {
|
|
263
|
+
await _testConvert(Samples.helmUn, converter(NOTATION.FASTA), Samples.fastaUn);
|
|
264
|
+
});
|
|
233
265
|
|
|
234
266
|
// helm -> separator
|
|
235
267
|
test('testHelmDnaToSeparator', async () => {
|
|
@@ -241,6 +273,9 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
241
273
|
test('testHelmPtToSeparator', async () => {
|
|
242
274
|
await _testConvert(Samples.helmPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
243
275
|
});
|
|
276
|
+
test('testHelmUnToSeparator', async () => {
|
|
277
|
+
await _testConvert(Samples.helmUn, converter(NOTATION.SEPARATOR, '-'), Samples.separatorUn);
|
|
278
|
+
});
|
|
244
279
|
|
|
245
280
|
// helm miscellaneous
|
|
246
281
|
test('testHelmLoneRibose', async () => {
|
|
@@ -3,8 +3,9 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
import {ALPHABET, getAlphabet, NOTATION
|
|
6
|
+
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
|
|
9
10
|
category('detectorsBenchmark', () => {
|
|
10
11
|
|
|
@@ -40,7 +41,7 @@ category('detectorsBenchmark', () => {
|
|
|
40
41
|
|
|
41
42
|
test('separatorDnaShorts50Few50', async () => {
|
|
42
43
|
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
|
|
43
|
-
});
|
|
44
|
+
}, {skipReason: '#1192'});
|
|
44
45
|
|
|
45
46
|
test('separatorDnaShorts50Many1E6', async () => {
|
|
46
47
|
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
|