@datagrok/bio 2.1.11 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -68651
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -66040
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +29 -32
- package/src/analysis/sequence-activity-cliffs.ts +15 -13
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +4 -2
- package/src/analysis/sequence-similarity-viewer.ts +4 -4
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +6 -6
- package/src/package-test.ts +9 -2
- package/src/package.ts +230 -145
- package/src/substructure-search/substructure-search.ts +25 -22
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +131 -68
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +30 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +81 -46
- package/src/tests/detectors-benchmark-tests.ts +17 -17
- package/src/tests/detectors-tests.ts +190 -178
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +33 -29
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +6 -7
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +116 -54
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +17 -11
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +49 -26
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +9 -8
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +125 -83
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
8
7
|
|
|
9
8
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
9
|
+
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
10
|
|
|
11
11
|
category('checkInputColumn', () => {
|
|
12
12
|
const csv = `seq
|
|
@@ -24,13 +24,13 @@ seq4`;
|
|
|
24
24
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
25
25
|
const col: DG.Column = df.getCol('seq');
|
|
26
26
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
27
|
-
col.setTag(DG.TAGS.UNITS,
|
|
28
|
-
col.setTag(
|
|
29
|
-
col.setTag(
|
|
27
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
28
|
+
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
29
|
+
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
30
30
|
|
|
31
31
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
32
|
-
col, 'Test', [
|
|
33
|
-
[
|
|
32
|
+
col, 'Test', [NOTATION.FASTA],
|
|
33
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
34
34
|
|
|
35
35
|
expect(res, true);
|
|
36
36
|
});
|
|
@@ -39,13 +39,13 @@ seq4`;
|
|
|
39
39
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
40
40
|
const col: DG.Column = df.getCol('seq');
|
|
41
41
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
-
col.setTag(DG.TAGS.UNITS,
|
|
42
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
43
43
|
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
44
|
-
col.setTag(
|
|
44
|
+
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
45
45
|
|
|
46
46
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
47
|
-
col, 'Test', [
|
|
48
|
-
[
|
|
47
|
+
col, 'Test', [NOTATION.FASTA],
|
|
48
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
49
49
|
|
|
50
50
|
expect(res, false);
|
|
51
51
|
});
|
|
@@ -54,15 +54,15 @@ seq4`;
|
|
|
54
54
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
55
55
|
const col: DG.Column = df.getCol('seq');
|
|
56
56
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
57
|
-
col.setTag(DG.TAGS.UNITS,
|
|
58
|
-
col.setTag(
|
|
59
|
-
col.setTag(
|
|
60
|
-
col.setTag(
|
|
61
|
-
col.setTag(
|
|
57
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
58
|
+
col.setTag(bioTAGS.alphabet, 'UN');
|
|
59
|
+
col.setTag(bioTAGS.alphabetSize, '11');
|
|
60
|
+
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
61
|
+
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
62
62
|
|
|
63
63
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
64
|
-
col, 'Test', [
|
|
65
|
-
[
|
|
64
|
+
col, 'Test', [NOTATION.FASTA],
|
|
65
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
66
66
|
|
|
67
67
|
expect(res, false);
|
|
68
68
|
});
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
3
|
|
|
6
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
5
|
|
|
8
6
|
import {ConverterFunc} from './types';
|
|
7
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
9
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
10
|
|
|
10
11
|
// import {mmSemType} from '../const';
|
|
11
12
|
// import {importFasta} from '../package';
|
|
@@ -28,6 +29,10 @@ category('converters', () => {
|
|
|
28
29
|
separatorGaps = 'separatorGaps',
|
|
29
30
|
helmGaps = 'helmGaps',
|
|
30
31
|
|
|
32
|
+
fastaUn = 'fastaUn',
|
|
33
|
+
separatorUn = 'separatorUn',
|
|
34
|
+
helmUn = 'helmUn',
|
|
35
|
+
|
|
31
36
|
helmLoneDeoxyribose = 'helmLoneDeoxyribose',
|
|
32
37
|
helmLoneRibose = 'helmLoneRibose',
|
|
33
38
|
helmLonePhosphorus = 'helmLonePhosphorus',
|
|
@@ -96,6 +101,22 @@ F/K/P//Q//S/E/Y/V
|
|
|
96
101
|
PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$
|
|
97
102
|
PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$
|
|
98
103
|
PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
|
|
104
|
+
`,
|
|
105
|
+
|
|
106
|
+
fastaUn: `seq
|
|
107
|
+
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
108
|
+
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
109
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
110
|
+
`,
|
|
111
|
+
separatorUn: `seq
|
|
112
|
+
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
113
|
+
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
114
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
115
|
+
`,
|
|
116
|
+
helmUn: `seq
|
|
117
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
118
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
119
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
99
120
|
`,
|
|
100
121
|
helmLoneDeoxyribose: `seq
|
|
101
122
|
DNA1{D(A).D(C).D(G).D(T).D(C)}$$$
|
|
@@ -114,37 +135,31 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
114
135
|
`,
|
|
115
136
|
};
|
|
116
137
|
|
|
117
|
-
const _csvDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
118
|
-
|
|
119
138
|
/** Also detects semantic types
|
|
120
139
|
* @param {string} key
|
|
121
140
|
* @return {Promise<DG.DataFrame>}
|
|
122
141
|
*/
|
|
123
|
-
function readCsv(key: string): Promise<DG.DataFrame> {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
})();
|
|
131
|
-
}
|
|
132
|
-
return _csvDfs[key];
|
|
133
|
-
};
|
|
142
|
+
async function readCsv(key: string): Promise<DG.DataFrame> {
|
|
143
|
+
// Always recreate test data frame from CSV for reproducible detector behavior in tests.
|
|
144
|
+
const csv: string = _csvTxts[key];
|
|
145
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
146
|
+
await grok.data.detectSemanticTypes(df);
|
|
147
|
+
return df;
|
|
148
|
+
}
|
|
134
149
|
|
|
135
|
-
function converter(tgtNotation:
|
|
136
|
-
if (tgtNotation ===
|
|
150
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
151
|
+
if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
|
|
137
152
|
throw new Error(`Argument 'separator' is missed for notation '${tgtNotation.toString()}'.`);
|
|
138
153
|
|
|
139
154
|
return function(srcCol: DG.Column): DG.Column {
|
|
140
|
-
const converter = new
|
|
155
|
+
const converter = new NotationConverter(srcCol);
|
|
141
156
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
142
157
|
expect(resCol.getTag('units'), tgtNotation);
|
|
143
158
|
return resCol;
|
|
144
159
|
};
|
|
145
|
-
}
|
|
160
|
+
}
|
|
146
161
|
|
|
147
|
-
async function _testConvert(srcKey:
|
|
162
|
+
async function _testConvert(srcKey: Samples, converter: ConverterFunc, tgtKey: Samples) {
|
|
148
163
|
const srcDf: DG.DataFrame = await readCsv(srcKey);
|
|
149
164
|
const srcCol: DG.Column = srcDf.getCol('seq');
|
|
150
165
|
|
|
@@ -156,100 +171,120 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
156
171
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
157
172
|
|
|
158
173
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
159
|
-
const uh:
|
|
174
|
+
const uh: UnitsHandler = new UnitsHandler(resCol);
|
|
160
175
|
}
|
|
161
176
|
|
|
162
177
|
// FASTA tests
|
|
163
178
|
// fasta -> separator
|
|
164
179
|
test('testFastaPtToSeparator', async () => {
|
|
165
|
-
await _testConvert(Samples.fastaPt, converter(
|
|
180
|
+
await _testConvert(Samples.fastaPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
166
181
|
});
|
|
167
182
|
test('testFastaDnaToSeparator', async () => {
|
|
168
|
-
await _testConvert(Samples.fastaDna, converter(
|
|
183
|
+
await _testConvert(Samples.fastaDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
169
184
|
});
|
|
170
185
|
test('testFastaRnaToSeparator', async () => {
|
|
171
|
-
await _testConvert(Samples.fastaRna, converter(
|
|
186
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
172
187
|
});
|
|
173
188
|
test('testFastaGapsToSeparator', async () => {
|
|
174
|
-
await _testConvert(Samples.fastaGaps, converter(
|
|
189
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
190
|
+
});
|
|
191
|
+
test('testFastaUnToSeparator', async () => {
|
|
192
|
+
await _testConvert(Samples.fastaUn, converter(NOTATION.SEPARATOR, '-'), Samples.separatorUn);
|
|
175
193
|
});
|
|
176
194
|
|
|
177
195
|
// fasta -> helm
|
|
178
196
|
test('testFastaPtToHelm', async () => {
|
|
179
|
-
await _testConvert(Samples.fastaPt, converter(
|
|
197
|
+
await _testConvert(Samples.fastaPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
180
198
|
});
|
|
181
199
|
test('testFastaDnaToHelm', async () => {
|
|
182
|
-
await _testConvert(Samples.fastaDna, converter(
|
|
200
|
+
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
183
201
|
});
|
|
184
202
|
test('testFastaRnaToHelm', async () => {
|
|
185
|
-
await _testConvert(Samples.fastaRna, converter(
|
|
203
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
186
204
|
});
|
|
187
205
|
test('testFastaGapsToHelm', async () => {
|
|
188
|
-
await _testConvert(Samples.fastaGaps, converter(
|
|
206
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
189
207
|
});
|
|
208
|
+
// TODO: testFastaUnToHelm
|
|
209
|
+
// test('testFastaUnToHelm', async () => {
|
|
210
|
+
// await _testConvert(Samples.fastaUn, converter(NOTATION.HELM), Samples.helmUn);
|
|
211
|
+
// });
|
|
190
212
|
|
|
191
213
|
|
|
192
214
|
// SEPARATOR tests
|
|
193
215
|
// separator -> fasta
|
|
194
216
|
test('testSeparatorPtToFasta', async () => {
|
|
195
|
-
await _testConvert(Samples.separatorPt, converter(
|
|
217
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
196
218
|
});
|
|
197
219
|
test('testSeparatorDnaToFasta', async () => {
|
|
198
|
-
await _testConvert(Samples.separatorDna, converter(
|
|
220
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
199
221
|
});
|
|
200
222
|
test('testSeparatorRnaToFasta', async () => {
|
|
201
|
-
await _testConvert(Samples.separatorRna, converter(
|
|
223
|
+
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
202
224
|
});
|
|
203
225
|
test('testSeparatorGapsToFasta', async () => {
|
|
204
|
-
await _testConvert(Samples.separatorGaps, converter(
|
|
226
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.FASTA), Samples.fastaGaps);
|
|
227
|
+
});
|
|
228
|
+
test('testSeparatorUnToFasta', async () => {
|
|
229
|
+
await _testConvert(Samples.separatorUn, converter(NOTATION.FASTA), Samples.fastaUn);
|
|
205
230
|
});
|
|
206
231
|
|
|
207
232
|
// separator -> helm
|
|
208
233
|
test('testSeparatorPtToHelm', async () => {
|
|
209
|
-
await _testConvert(Samples.separatorPt, converter(
|
|
234
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
210
235
|
});
|
|
211
236
|
test('testSeparatorDnaToHelm', async () => {
|
|
212
|
-
await _testConvert(Samples.separatorDna, converter(
|
|
237
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
213
238
|
});
|
|
214
239
|
test('testSeparatorRnaToHelm', async () => {
|
|
215
|
-
await _testConvert(Samples.separatorRna, converter(
|
|
240
|
+
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
216
241
|
});
|
|
217
242
|
test('testSeparatorGapsToHelm', async () => {
|
|
218
|
-
await _testConvert(Samples.separatorGaps, converter(
|
|
243
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
219
244
|
});
|
|
245
|
+
// TODO: testSeparatorUnToHelm
|
|
246
|
+
// test('testSeparatorUnToHelm', async () => {
|
|
247
|
+
// await _testConvert(Samples.separatorUn, converter(NOTATION.HELM), Samples.helmUn);
|
|
248
|
+
// });
|
|
220
249
|
|
|
221
250
|
|
|
222
251
|
// HELM tests
|
|
223
252
|
// helm -> fasta
|
|
224
253
|
test('testHelmDnaToFasta', async () => {
|
|
225
|
-
await _testConvert(Samples.helmDna, converter(
|
|
254
|
+
await _testConvert(Samples.helmDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
226
255
|
});
|
|
227
256
|
test('testHelmRnaToFasta', async () => {
|
|
228
|
-
await _testConvert(Samples.helmRna, converter(
|
|
257
|
+
await _testConvert(Samples.helmRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
229
258
|
});
|
|
230
259
|
test('testHelmPtToFasta', async () => {
|
|
231
|
-
await _testConvert(Samples.helmPt, converter(
|
|
260
|
+
await _testConvert(Samples.helmPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
261
|
+
});
|
|
262
|
+
test('testHelmUnToFasta', async () => {
|
|
263
|
+
await _testConvert(Samples.helmUn, converter(NOTATION.FASTA), Samples.fastaUn);
|
|
232
264
|
});
|
|
233
265
|
|
|
234
266
|
// helm -> separator
|
|
235
267
|
test('testHelmDnaToSeparator', async () => {
|
|
236
|
-
await _testConvert(Samples.helmDna, converter(
|
|
268
|
+
await _testConvert(Samples.helmDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
237
269
|
});
|
|
238
270
|
test('testHelmRnaToSeparator', async () => {
|
|
239
|
-
await _testConvert(Samples.helmRna, converter(
|
|
271
|
+
await _testConvert(Samples.helmRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
240
272
|
});
|
|
241
273
|
test('testHelmPtToSeparator', async () => {
|
|
242
|
-
await _testConvert(Samples.helmPt, converter(
|
|
274
|
+
await _testConvert(Samples.helmPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
275
|
+
});
|
|
276
|
+
test('testHelmUnToSeparator', async () => {
|
|
277
|
+
await _testConvert(Samples.helmUn, converter(NOTATION.SEPARATOR, '-'), Samples.separatorUn);
|
|
243
278
|
});
|
|
244
279
|
|
|
245
280
|
// helm miscellaneous
|
|
246
281
|
test('testHelmLoneRibose', async () => {
|
|
247
|
-
await _testConvert(Samples.helmLoneRibose, converter(
|
|
282
|
+
await _testConvert(Samples.helmLoneRibose, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
248
283
|
});
|
|
249
284
|
test('testHelmLoneDeoxyribose', async () => {
|
|
250
|
-
await _testConvert(Samples.helmLoneDeoxyribose, converter(
|
|
285
|
+
await _testConvert(Samples.helmLoneDeoxyribose, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
251
286
|
});
|
|
252
287
|
test('testHelmLonePhosphorus', async () => {
|
|
253
|
-
await _testConvert(Samples.helmLonePhosphorus, converter(
|
|
288
|
+
await _testConvert(Samples.helmLonePhosphorus, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
254
289
|
});
|
|
255
290
|
});
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
|
-
import {
|
|
6
|
+
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
7
|
import {Column} from 'datagrok-api/dg';
|
|
8
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
9
|
|
|
10
10
|
category('detectorsBenchmark', () => {
|
|
11
11
|
|
|
@@ -23,42 +23,42 @@ category('detectorsBenchmark', () => {
|
|
|
23
23
|
// -- fasta --
|
|
24
24
|
|
|
25
25
|
test('fastaDnaShorts50Few50', async () => {
|
|
26
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
26
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 50);
|
|
27
27
|
},
|
|
28
28
|
{skipReason: '#1192'});
|
|
29
29
|
|
|
30
30
|
test('fastaDnaShorts50Many1E6', async () => {
|
|
31
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
31
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 1E6);
|
|
32
32
|
},
|
|
33
33
|
{skipReason: '#1192'});
|
|
34
34
|
|
|
35
35
|
test('fastaDnaLong1e6Few50', async () => {
|
|
36
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
36
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
|
|
37
37
|
},
|
|
38
38
|
{skipReason: '#1192'});
|
|
39
39
|
|
|
40
40
|
// -- separator --
|
|
41
41
|
|
|
42
42
|
test('separatorDnaShorts50Few50', async () => {
|
|
43
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
44
|
-
});
|
|
43
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
|
|
44
|
+
}, {skipReason: '#1192'});
|
|
45
45
|
|
|
46
46
|
test('separatorDnaShorts50Many1E6', async () => {
|
|
47
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
47
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
|
|
48
48
|
},
|
|
49
49
|
{ /* skipReason: 'slow transmit large dataset to detector' */});
|
|
50
50
|
|
|
51
51
|
test('separatorDnaLong1e6Few50', async () => {
|
|
52
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
52
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
|
|
53
53
|
},
|
|
54
54
|
{skipReason: '#1192'});
|
|
55
55
|
|
|
56
56
|
async function detectMacromoleculeBenchmark(
|
|
57
|
-
maxET: number, notation:
|
|
57
|
+
maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string
|
|
58
58
|
): Promise<number> {
|
|
59
59
|
return await benchmark<DG.FuncCall, DG.Column>(10,
|
|
60
60
|
(): DG.FuncCall => {
|
|
61
|
-
const col: DG.Column = generate(notation, [...
|
|
61
|
+
const col: DG.Column = generate(notation, [...getAlphabet(alphabet)], length, count, separator);
|
|
62
62
|
const funcCall: DG.FuncCall = detectFunc.prepare({col: col});
|
|
63
63
|
return funcCall;
|
|
64
64
|
},
|
|
@@ -75,11 +75,13 @@ category('detectorsBenchmark', () => {
|
|
|
75
75
|
});
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
function generate(
|
|
78
|
+
function generate(
|
|
79
|
+
notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string
|
|
80
|
+
): DG.Column {
|
|
79
81
|
let seqMerger: (seqMList: string[], separator?: string) => string;
|
|
80
82
|
|
|
81
83
|
switch (notation) {
|
|
82
|
-
case
|
|
84
|
+
case NOTATION.FASTA:
|
|
83
85
|
seqMerger = (seqMList: string[]): string => {
|
|
84
86
|
let res: string = '';
|
|
85
87
|
for (let j = 0; j < seqMList.length; j++) {
|
|
@@ -89,7 +91,7 @@ category('detectorsBenchmark', () => {
|
|
|
89
91
|
return res;
|
|
90
92
|
};
|
|
91
93
|
break;
|
|
92
|
-
case
|
|
94
|
+
case NOTATION.SEPARATOR:
|
|
93
95
|
seqMerger = (seqMList: string[], separator?: string): string => {
|
|
94
96
|
return seqMList.join(separator);
|
|
95
97
|
};
|
|
@@ -114,7 +116,7 @@ category('detectorsBenchmark', () => {
|
|
|
114
116
|
return DG.Column.fromStrings('seq', seqList);
|
|
115
117
|
}
|
|
116
118
|
|
|
117
|
-
type TgtType = { semType: string, notation:
|
|
119
|
+
type TgtType = { semType: string, notation: NOTATION, alphabet: ALPHABET, separator?: string };
|
|
118
120
|
|
|
119
121
|
function testDetector(funcCall: DG.FuncCall): DG.Column {
|
|
120
122
|
//const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -133,7 +135,6 @@ category('detectorsBenchmark', () => {
|
|
|
133
135
|
expect(uh.alphabet, tgt.alphabet);
|
|
134
136
|
expect(uh.separator, tgt.separator);
|
|
135
137
|
}
|
|
136
|
-
|
|
137
138
|
});
|
|
138
139
|
|
|
139
140
|
|
|
@@ -162,4 +163,3 @@ async function benchmark<TData, TRes>(
|
|
|
162
163
|
|
|
163
164
|
return resET;
|
|
164
165
|
}
|
|
165
|
-
|