@datagrok/bio 2.1.11 → 2.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +2180 -57663
- package/dist/package.js +1771 -57251
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +301 -250
- package/package.json +10 -12
- package/src/analysis/sequence-activity-cliffs.ts +5 -5
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +2 -2
- package/src/calculations/monomerLevelMols.ts +3 -3
- package/src/package.ts +25 -24
- package/src/substructure-search/substructure-search.ts +9 -9
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +64 -57
- package/src/tests/bio-tests.ts +31 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +30 -30
- package/src/tests/detectors-benchmark-tests.ts +15 -16
- package/src/tests/detectors-tests.ts +31 -24
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/renderers-test.ts +17 -15
- package/src/tests/splitters-test.ts +3 -3
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/utils/cell-renderer.ts +33 -24
- package/src/utils/convert.ts +10 -10
- package/src/utils/multiple-sequence-alignment.ts +6 -7
- package/src/utils/save-as-fasta.ts +8 -8
- package/src/viewers/vd-regions-viewer.ts +15 -14
- package/tsconfig.json +1 -1
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
package/src/tests/bio-tests.ts
CHANGED
|
@@ -1,9 +1,20 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {
|
|
7
|
+
AminoacidsPalettes,
|
|
8
|
+
getAlphabetSimilarity,
|
|
9
|
+
getStats,
|
|
10
|
+
monomerToShort,
|
|
11
|
+
Nucleotides,
|
|
12
|
+
NucleotidesPalettes,
|
|
13
|
+
pickUpPalette,
|
|
14
|
+
splitterAsFasta,
|
|
15
|
+
splitterAsHelm,
|
|
16
|
+
UnknownSeqPalette
|
|
17
|
+
} from '@datagrok-libraries/bio';
|
|
7
18
|
|
|
8
19
|
category('bio', () => {
|
|
9
20
|
const csvDfN1: string = `seq
|
|
@@ -53,7 +64,7 @@ PEPTIDE1{meI}$$$$
|
|
|
53
64
|
`;
|
|
54
65
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
55
66
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
56
|
-
const stats =
|
|
67
|
+
const stats = getStats(seqCol, 1, splitterAsHelm);
|
|
57
68
|
|
|
58
69
|
expectObject(stats.freq, {
|
|
59
70
|
'meI': 1
|
|
@@ -72,19 +83,19 @@ PEPTIDE1{meI}$$$$
|
|
|
72
83
|
|
|
73
84
|
category('WebLogo.monomerToShort', () => {
|
|
74
85
|
test('longMonomerSingle', async () => {
|
|
75
|
-
await expect(
|
|
86
|
+
await expect(monomerToShort('S', 5), 'S');
|
|
76
87
|
});
|
|
77
88
|
test('longMonomerShort', async () => {
|
|
78
|
-
await expect(
|
|
89
|
+
await expect(monomerToShort('Short', 5), 'Short');
|
|
79
90
|
});
|
|
80
91
|
test('longMonomerLong56', async () => {
|
|
81
|
-
await expect(
|
|
92
|
+
await expect(monomerToShort('Long56', 5), 'Long5…');
|
|
82
93
|
});
|
|
83
94
|
test('longMonomerComplexFirstPartShort', async () => {
|
|
84
|
-
await expect(
|
|
95
|
+
await expect(monomerToShort('Long-long', 5), 'Long…');
|
|
85
96
|
});
|
|
86
97
|
test('longMonomerComplexFirstPartLong56', async () => {
|
|
87
|
-
await expect(
|
|
98
|
+
await expect(monomerToShort('Long56-long', 5), 'Long5…');
|
|
88
99
|
});
|
|
89
100
|
});
|
|
90
101
|
|
|
@@ -92,7 +103,7 @@ category('WebLogo.monomerToShort', () => {
|
|
|
92
103
|
export async function _testGetStats(csvDfN1: string) {
|
|
93
104
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
94
105
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
95
|
-
const stats =
|
|
106
|
+
const stats = getStats(seqCol, 5, splitterAsFasta);
|
|
96
107
|
|
|
97
108
|
expectObject(stats.freq, {
|
|
98
109
|
'A': 4,
|
|
@@ -111,8 +122,8 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
111
122
|
'T': 2048,
|
|
112
123
|
'-': 1000
|
|
113
124
|
};
|
|
114
|
-
const alphabet: Set<string> = new Set(Object.keys(
|
|
115
|
-
const res =
|
|
125
|
+
const alphabet: Set<string> = new Set(Object.keys(Nucleotides.Names));
|
|
126
|
+
const res = getAlphabetSimilarity(freq, alphabet);
|
|
116
127
|
|
|
117
128
|
expect(res > 0.6, true);
|
|
118
129
|
}
|
|
@@ -120,39 +131,38 @@ export async function _testGetAlphabetSimilarity() {
|
|
|
120
131
|
export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
121
132
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
122
133
|
const col: DG.Column = df.col('seq')!;
|
|
123
|
-
const cp =
|
|
134
|
+
const cp = pickUpPalette(col);
|
|
124
135
|
|
|
125
|
-
expect(cp instanceof
|
|
136
|
+
expect(cp instanceof NucleotidesPalettes, true);
|
|
126
137
|
}
|
|
127
138
|
|
|
128
139
|
export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
129
140
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
130
141
|
const col: DG.Column = df.col('seq')!;
|
|
131
|
-
const cp =
|
|
142
|
+
const cp = pickUpPalette(col);
|
|
132
143
|
|
|
133
|
-
expect(cp instanceof
|
|
144
|
+
expect(cp instanceof NucleotidesPalettes, true);
|
|
134
145
|
}
|
|
135
146
|
|
|
136
147
|
export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
137
148
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
138
149
|
const col: DG.Column = df.col('seq')!;
|
|
139
|
-
const cp =
|
|
150
|
+
const cp = pickUpPalette(col);
|
|
140
151
|
|
|
141
|
-
expect(cp instanceof
|
|
152
|
+
expect(cp instanceof AminoacidsPalettes, true);
|
|
142
153
|
}
|
|
143
154
|
|
|
144
155
|
export async function _testPickupPaletteX(csvDfX: string) {
|
|
145
156
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
146
157
|
const col: DG.Column = df.col('seq')!;
|
|
147
|
-
const cp =
|
|
158
|
+
const cp = pickUpPalette(col);
|
|
148
159
|
|
|
149
|
-
expect(cp instanceof
|
|
160
|
+
expect(cp instanceof UnknownSeqPalette, true);
|
|
150
161
|
}
|
|
151
162
|
|
|
152
163
|
export async function _testPickupPaletteAA2(dfAA2: DG.DataFrame) {
|
|
153
164
|
const seqCol: DG.Column = dfAA2.col('seq')!;
|
|
154
|
-
const cp =
|
|
165
|
+
const cp = pickUpPalette(seqCol);
|
|
155
166
|
|
|
156
|
-
expect(cp instanceof
|
|
167
|
+
expect(cp instanceof AminoacidsPalettes, true);
|
|
157
168
|
}
|
|
158
|
-
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
8
7
|
|
|
9
8
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
9
|
+
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio';
|
|
10
10
|
|
|
11
11
|
category('checkInputColumn', () => {
|
|
12
12
|
const csv = `seq
|
|
@@ -24,13 +24,13 @@ seq4`;
|
|
|
24
24
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
25
25
|
const col: DG.Column = df.getCol('seq');
|
|
26
26
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
27
|
-
col.setTag(DG.TAGS.UNITS,
|
|
28
|
-
col.setTag(
|
|
29
|
-
col.setTag(
|
|
27
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
28
|
+
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
29
|
+
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
30
30
|
|
|
31
31
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
32
|
-
col, 'Test', [
|
|
33
|
-
[
|
|
32
|
+
col, 'Test', [NOTATION.FASTA],
|
|
33
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
34
34
|
|
|
35
35
|
expect(res, true);
|
|
36
36
|
});
|
|
@@ -39,13 +39,13 @@ seq4`;
|
|
|
39
39
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
40
40
|
const col: DG.Column = df.getCol('seq');
|
|
41
41
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
|
-
col.setTag(DG.TAGS.UNITS,
|
|
42
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
43
43
|
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
44
|
-
col.setTag(
|
|
44
|
+
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
45
45
|
|
|
46
46
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
47
|
-
col, 'Test', [
|
|
48
|
-
[
|
|
47
|
+
col, 'Test', [NOTATION.FASTA],
|
|
48
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
49
49
|
|
|
50
50
|
expect(res, false);
|
|
51
51
|
});
|
|
@@ -54,15 +54,15 @@ seq4`;
|
|
|
54
54
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
55
55
|
const col: DG.Column = df.getCol('seq');
|
|
56
56
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
57
|
-
col.setTag(DG.TAGS.UNITS,
|
|
58
|
-
col.setTag(
|
|
59
|
-
col.setTag(
|
|
60
|
-
col.setTag(
|
|
61
|
-
col.setTag(
|
|
57
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
58
|
+
col.setTag(bioTAGS.alphabet, 'UN');
|
|
59
|
+
col.setTag(bioTAGS.alphabetSize, '11');
|
|
60
|
+
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
61
|
+
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
62
62
|
|
|
63
63
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
64
|
-
col, 'Test', [
|
|
65
|
-
[
|
|
64
|
+
col, 'Test', [NOTATION.FASTA],
|
|
65
|
+
[ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT]);
|
|
66
66
|
|
|
67
67
|
expect(res, false);
|
|
68
68
|
});
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
7
|
import {ConverterFunc} from './types';
|
|
8
|
+
import {NOTATION, NotationConverter, UnitsHandler} from '@datagrok-libraries/bio';
|
|
9
9
|
|
|
10
10
|
// import {mmSemType} from '../const';
|
|
11
11
|
// import {importFasta} from '../package';
|
|
@@ -132,12 +132,12 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
132
132
|
return _csvDfs[key];
|
|
133
133
|
};
|
|
134
134
|
|
|
135
|
-
function converter(tgtNotation:
|
|
136
|
-
if (tgtNotation ===
|
|
135
|
+
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
136
|
+
if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
|
|
137
137
|
throw new Error(`Argument 'separator' is missed for notation '${tgtNotation.toString()}'.`);
|
|
138
138
|
|
|
139
139
|
return function(srcCol: DG.Column): DG.Column {
|
|
140
|
-
const converter = new
|
|
140
|
+
const converter = new NotationConverter(srcCol);
|
|
141
141
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
142
142
|
expect(resCol.getTag('units'), tgtNotation);
|
|
143
143
|
return resCol;
|
|
@@ -156,100 +156,100 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
156
156
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
157
157
|
|
|
158
158
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
159
|
-
const uh:
|
|
159
|
+
const uh: UnitsHandler = new UnitsHandler(resCol);
|
|
160
160
|
}
|
|
161
161
|
|
|
162
162
|
// FASTA tests
|
|
163
163
|
// fasta -> separator
|
|
164
164
|
test('testFastaPtToSeparator', async () => {
|
|
165
|
-
await _testConvert(Samples.fastaPt, converter(
|
|
165
|
+
await _testConvert(Samples.fastaPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
166
166
|
});
|
|
167
167
|
test('testFastaDnaToSeparator', async () => {
|
|
168
|
-
await _testConvert(Samples.fastaDna, converter(
|
|
168
|
+
await _testConvert(Samples.fastaDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
169
169
|
});
|
|
170
170
|
test('testFastaRnaToSeparator', async () => {
|
|
171
|
-
await _testConvert(Samples.fastaRna, converter(
|
|
171
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
172
172
|
});
|
|
173
173
|
test('testFastaGapsToSeparator', async () => {
|
|
174
|
-
await _testConvert(Samples.fastaGaps, converter(
|
|
174
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
175
175
|
});
|
|
176
176
|
|
|
177
177
|
// fasta -> helm
|
|
178
178
|
test('testFastaPtToHelm', async () => {
|
|
179
|
-
await _testConvert(Samples.fastaPt, converter(
|
|
179
|
+
await _testConvert(Samples.fastaPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
180
180
|
});
|
|
181
181
|
test('testFastaDnaToHelm', async () => {
|
|
182
|
-
await _testConvert(Samples.fastaDna, converter(
|
|
182
|
+
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
183
183
|
});
|
|
184
184
|
test('testFastaRnaToHelm', async () => {
|
|
185
|
-
await _testConvert(Samples.fastaRna, converter(
|
|
185
|
+
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
186
186
|
});
|
|
187
187
|
test('testFastaGapsToHelm', async () => {
|
|
188
|
-
await _testConvert(Samples.fastaGaps, converter(
|
|
188
|
+
await _testConvert(Samples.fastaGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
189
189
|
});
|
|
190
190
|
|
|
191
191
|
|
|
192
192
|
// SEPARATOR tests
|
|
193
193
|
// separator -> fasta
|
|
194
194
|
test('testSeparatorPtToFasta', async () => {
|
|
195
|
-
await _testConvert(Samples.separatorPt, converter(
|
|
195
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
196
196
|
});
|
|
197
197
|
test('testSeparatorDnaToFasta', async () => {
|
|
198
|
-
await _testConvert(Samples.separatorDna, converter(
|
|
198
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
199
199
|
});
|
|
200
200
|
test('testSeparatorRnaToFasta', async () => {
|
|
201
|
-
await _testConvert(Samples.separatorRna, converter(
|
|
201
|
+
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
202
202
|
});
|
|
203
203
|
test('testSeparatorGapsToFasta', async () => {
|
|
204
|
-
await _testConvert(Samples.separatorGaps, converter(
|
|
204
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.FASTA), Samples.fastaGaps);
|
|
205
205
|
});
|
|
206
206
|
|
|
207
207
|
// separator -> helm
|
|
208
208
|
test('testSeparatorPtToHelm', async () => {
|
|
209
|
-
await _testConvert(Samples.separatorPt, converter(
|
|
209
|
+
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
210
210
|
});
|
|
211
211
|
test('testSeparatorDnaToHelm', async () => {
|
|
212
|
-
await _testConvert(Samples.separatorDna, converter(
|
|
212
|
+
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
213
213
|
});
|
|
214
214
|
test('testSeparatorRnaToHelm', async () => {
|
|
215
|
-
await _testConvert(Samples.separatorRna, converter(
|
|
215
|
+
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
216
216
|
});
|
|
217
217
|
test('testSeparatorGapsToHelm', async () => {
|
|
218
|
-
await _testConvert(Samples.separatorGaps, converter(
|
|
218
|
+
await _testConvert(Samples.separatorGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
219
219
|
});
|
|
220
220
|
|
|
221
221
|
|
|
222
222
|
// HELM tests
|
|
223
223
|
// helm -> fasta
|
|
224
224
|
test('testHelmDnaToFasta', async () => {
|
|
225
|
-
await _testConvert(Samples.helmDna, converter(
|
|
225
|
+
await _testConvert(Samples.helmDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
226
226
|
});
|
|
227
227
|
test('testHelmRnaToFasta', async () => {
|
|
228
|
-
await _testConvert(Samples.helmRna, converter(
|
|
228
|
+
await _testConvert(Samples.helmRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
229
229
|
});
|
|
230
230
|
test('testHelmPtToFasta', async () => {
|
|
231
|
-
await _testConvert(Samples.helmPt, converter(
|
|
231
|
+
await _testConvert(Samples.helmPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
232
232
|
});
|
|
233
233
|
|
|
234
234
|
// helm -> separator
|
|
235
235
|
test('testHelmDnaToSeparator', async () => {
|
|
236
|
-
await _testConvert(Samples.helmDna, converter(
|
|
236
|
+
await _testConvert(Samples.helmDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
237
237
|
});
|
|
238
238
|
test('testHelmRnaToSeparator', async () => {
|
|
239
|
-
await _testConvert(Samples.helmRna, converter(
|
|
239
|
+
await _testConvert(Samples.helmRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
240
240
|
});
|
|
241
241
|
test('testHelmPtToSeparator', async () => {
|
|
242
|
-
await _testConvert(Samples.helmPt, converter(
|
|
242
|
+
await _testConvert(Samples.helmPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
243
243
|
});
|
|
244
244
|
|
|
245
245
|
// helm miscellaneous
|
|
246
246
|
test('testHelmLoneRibose', async () => {
|
|
247
|
-
await _testConvert(Samples.helmLoneRibose, converter(
|
|
247
|
+
await _testConvert(Samples.helmLoneRibose, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
248
248
|
});
|
|
249
249
|
test('testHelmLoneDeoxyribose', async () => {
|
|
250
|
-
await _testConvert(Samples.helmLoneDeoxyribose, converter(
|
|
250
|
+
await _testConvert(Samples.helmLoneDeoxyribose, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
251
251
|
});
|
|
252
252
|
test('testHelmLonePhosphorus', async () => {
|
|
253
|
-
await _testConvert(Samples.helmLonePhosphorus, converter(
|
|
253
|
+
await _testConvert(Samples.helmLonePhosphorus, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
254
254
|
});
|
|
255
255
|
});
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
|
-
import {UnitsHandler} from '@datagrok-libraries/bio';
|
|
6
|
+
import {ALPHABET, getAlphabet, NOTATION, UnitsHandler} from '@datagrok-libraries/bio';
|
|
8
7
|
import {Column} from 'datagrok-api/dg';
|
|
9
8
|
|
|
10
9
|
category('detectorsBenchmark', () => {
|
|
@@ -23,42 +22,42 @@ category('detectorsBenchmark', () => {
|
|
|
23
22
|
// -- fasta --
|
|
24
23
|
|
|
25
24
|
test('fastaDnaShorts50Few50', async () => {
|
|
26
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
25
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 50);
|
|
27
26
|
},
|
|
28
27
|
{skipReason: '#1192'});
|
|
29
28
|
|
|
30
29
|
test('fastaDnaShorts50Many1E6', async () => {
|
|
31
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
30
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 1E6);
|
|
32
31
|
},
|
|
33
32
|
{skipReason: '#1192'});
|
|
34
33
|
|
|
35
34
|
test('fastaDnaLong1e6Few50', async () => {
|
|
36
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
35
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
|
|
37
36
|
},
|
|
38
37
|
{skipReason: '#1192'});
|
|
39
38
|
|
|
40
39
|
// -- separator --
|
|
41
40
|
|
|
42
41
|
test('separatorDnaShorts50Few50', async () => {
|
|
43
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
42
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
|
|
44
43
|
});
|
|
45
44
|
|
|
46
45
|
test('separatorDnaShorts50Many1E6', async () => {
|
|
47
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
46
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
|
|
48
47
|
},
|
|
49
48
|
{ /* skipReason: 'slow transmit large dataset to detector' */});
|
|
50
49
|
|
|
51
50
|
test('separatorDnaLong1e6Few50', async () => {
|
|
52
|
-
const et: number = await detectMacromoleculeBenchmark(10,
|
|
51
|
+
const et: number = await detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
|
|
53
52
|
},
|
|
54
53
|
{skipReason: '#1192'});
|
|
55
54
|
|
|
56
55
|
async function detectMacromoleculeBenchmark(
|
|
57
|
-
maxET: number, notation:
|
|
56
|
+
maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string
|
|
58
57
|
): Promise<number> {
|
|
59
58
|
return await benchmark<DG.FuncCall, DG.Column>(10,
|
|
60
59
|
(): DG.FuncCall => {
|
|
61
|
-
const col: DG.Column = generate(notation, [...
|
|
60
|
+
const col: DG.Column = generate(notation, [...getAlphabet(alphabet)], length, count, separator);
|
|
62
61
|
const funcCall: DG.FuncCall = detectFunc.prepare({col: col});
|
|
63
62
|
return funcCall;
|
|
64
63
|
},
|
|
@@ -75,11 +74,13 @@ category('detectorsBenchmark', () => {
|
|
|
75
74
|
});
|
|
76
75
|
}
|
|
77
76
|
|
|
78
|
-
function generate(
|
|
77
|
+
function generate(
|
|
78
|
+
notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string
|
|
79
|
+
): DG.Column {
|
|
79
80
|
let seqMerger: (seqMList: string[], separator?: string) => string;
|
|
80
81
|
|
|
81
82
|
switch (notation) {
|
|
82
|
-
case
|
|
83
|
+
case NOTATION.FASTA:
|
|
83
84
|
seqMerger = (seqMList: string[]): string => {
|
|
84
85
|
let res: string = '';
|
|
85
86
|
for (let j = 0; j < seqMList.length; j++) {
|
|
@@ -89,7 +90,7 @@ category('detectorsBenchmark', () => {
|
|
|
89
90
|
return res;
|
|
90
91
|
};
|
|
91
92
|
break;
|
|
92
|
-
case
|
|
93
|
+
case NOTATION.SEPARATOR:
|
|
93
94
|
seqMerger = (seqMList: string[], separator?: string): string => {
|
|
94
95
|
return seqMList.join(separator);
|
|
95
96
|
};
|
|
@@ -114,7 +115,7 @@ category('detectorsBenchmark', () => {
|
|
|
114
115
|
return DG.Column.fromStrings('seq', seqList);
|
|
115
116
|
}
|
|
116
117
|
|
|
117
|
-
type TgtType = { semType: string, notation:
|
|
118
|
+
type TgtType = { semType: string, notation: NOTATION, alphabet: ALPHABET, separator?: string };
|
|
118
119
|
|
|
119
120
|
function testDetector(funcCall: DG.FuncCall): DG.Column {
|
|
120
121
|
//const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -133,7 +134,6 @@ category('detectorsBenchmark', () => {
|
|
|
133
134
|
expect(uh.alphabet, tgt.alphabet);
|
|
134
135
|
expect(uh.separator, tgt.separator);
|
|
135
136
|
}
|
|
136
|
-
|
|
137
137
|
});
|
|
138
138
|
|
|
139
139
|
|
|
@@ -162,4 +162,3 @@ async function benchmark<TData, TRes>(
|
|
|
162
162
|
|
|
163
163
|
return resET;
|
|
164
164
|
}
|
|
165
|
-
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
7
|
import {importFasta} from '../package';
|
|
8
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS, UnitsHandler} from '@datagrok-libraries/bio';
|
|
9
9
|
|
|
10
10
|
/*
|
|
11
11
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -214,49 +214,56 @@ MWRSWY-CKHP
|
|
|
214
214
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
215
215
|
|
|
216
216
|
test('Dna1', async () => {
|
|
217
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq',
|
|
217
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq',
|
|
218
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false);
|
|
218
219
|
});
|
|
219
220
|
test('Rna1', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq',
|
|
221
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq',
|
|
222
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false);
|
|
221
223
|
});
|
|
222
224
|
test('AA1', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq',
|
|
225
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq',
|
|
226
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
224
227
|
});
|
|
225
228
|
test('MsaDna1', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq',
|
|
229
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq',
|
|
230
|
+
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
227
231
|
});
|
|
228
232
|
|
|
229
233
|
test('MsaAA1', async () => {
|
|
230
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq',
|
|
231
|
-
|
|
234
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', NOTATION.FASTA,
|
|
235
|
+
ALIGNMENT.SEQ_MSA, ALPHABET.PT, 20, false);
|
|
232
236
|
});
|
|
233
237
|
|
|
234
238
|
test('SepDna', async () => {
|
|
235
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq',
|
|
239
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq',
|
|
240
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.DNA, 4, false, '*');
|
|
236
241
|
});
|
|
237
242
|
test('SepRna', async () => {
|
|
238
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq',
|
|
243
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq',
|
|
244
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.RNA, 4, false, '*');
|
|
239
245
|
});
|
|
240
246
|
test('SepPt', async () => {
|
|
241
247
|
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
242
|
-
|
|
248
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.PT, 20, false, '-');
|
|
243
249
|
});
|
|
244
250
|
test('SepUn1', async () => {
|
|
245
251
|
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
246
|
-
|
|
252
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 8, true, '-');
|
|
247
253
|
});
|
|
248
254
|
test('SepUn2', async () => {
|
|
249
255
|
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
250
|
-
|
|
256
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 9, true, '/');
|
|
251
257
|
});
|
|
252
258
|
|
|
253
259
|
test('SepMsaN1', async () => {
|
|
254
260
|
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
255
|
-
|
|
261
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false, '-');
|
|
256
262
|
});
|
|
257
263
|
|
|
258
264
|
test('SamplesFastaCsvPt', async () => {
|
|
259
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence',
|
|
265
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence',
|
|
266
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
260
267
|
});
|
|
261
268
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
262
269
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -270,7 +277,7 @@ MWRSWY-CKHP
|
|
|
270
277
|
|
|
271
278
|
test('SamplesFastaFastaPt', async () => {
|
|
272
279
|
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
273
|
-
'sequence',
|
|
280
|
+
'sequence', NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
274
281
|
});
|
|
275
282
|
|
|
276
283
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -289,7 +296,7 @@ MWRSWY-CKHP
|
|
|
289
296
|
|
|
290
297
|
test('samplesMsaComplexUn', async () => {
|
|
291
298
|
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
292
|
-
|
|
299
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 161, true, '/');
|
|
293
300
|
});
|
|
294
301
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
295
302
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -304,7 +311,8 @@ MWRSWY-CKHP
|
|
|
304
311
|
});
|
|
305
312
|
|
|
306
313
|
test('samplesHelmCsvHELM', async () => {
|
|
307
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM',
|
|
314
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM',
|
|
315
|
+
NOTATION.HELM, null, null, 160, true, null);
|
|
308
316
|
});
|
|
309
317
|
|
|
310
318
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -320,7 +328,7 @@ MWRSWY-CKHP
|
|
|
320
328
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
321
329
|
});
|
|
322
330
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
323
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string',
|
|
331
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', NOTATION.HELM, null, null, 9, true, null);
|
|
324
332
|
});
|
|
325
333
|
test('samplesTestHelmNegativeValid', async () => {
|
|
326
334
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -357,7 +365,7 @@ MWRSWY-CKHP
|
|
|
357
365
|
|
|
358
366
|
test('samplesFastaPtPosSequence', async () => {
|
|
359
367
|
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence',
|
|
360
|
-
|
|
368
|
+
NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false);
|
|
361
369
|
});
|
|
362
370
|
|
|
363
371
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -431,12 +439,12 @@ export async function _testPos(
|
|
|
431
439
|
|
|
432
440
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
433
441
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
434
|
-
expect(col.getTag(
|
|
435
|
-
expect(col.getTag(
|
|
442
|
+
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
443
|
+
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
436
444
|
if (separator)
|
|
437
|
-
expect(col.getTag(
|
|
445
|
+
expect(col.getTag(bioTAGS.separator), separator);
|
|
438
446
|
|
|
439
|
-
const uh = new
|
|
447
|
+
const uh = new UnitsHandler(col);
|
|
440
448
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
441
449
|
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
442
450
|
if (!uh.isHelm()) {
|
|
@@ -444,4 +452,3 @@ export async function _testPos(
|
|
|
444
452
|
expect(uh.alphabet, alphabet);
|
|
445
453
|
}
|
|
446
454
|
}
|
|
447
|
-
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
+
import {splitterAsFasta} from '@datagrok-libraries/bio';
|
|
8
8
|
|
|
9
9
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
10
|
|
|
11
11
|
category('fastaExport', () => {
|
|
12
|
-
|
|
13
12
|
enum WrapDataTest {
|
|
14
13
|
single = 'single',
|
|
15
14
|
multi = 'multi'
|
|
@@ -88,7 +87,7 @@ MRGGL
|
|
|
88
87
|
});
|
|
89
88
|
|
|
90
89
|
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
91
|
-
const splitter =
|
|
90
|
+
const splitter = splitterAsFasta;
|
|
92
91
|
|
|
93
92
|
const srcSeq: string = wrapData[testKey].src;
|
|
94
93
|
const wrapRes: string[] = wrapSequence(srcSeq, splitter, lineWidth);
|