@datagrok/bio 2.4.30 → 2.4.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +185 -48
- package/src/analysis/sequence-activity-cliffs.ts +9 -11
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +8 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +8 -5
- package/src/demo/utils.ts +4 -3
- package/src/package-test.ts +1 -2
- package/src/package.ts +138 -83
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -6
- package/src/tests/activity-cliffs-utils.ts +6 -4
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -7
- package/src/tests/sequence-space-utils.ts +7 -3
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/705.js +0 -2
- package/dist/705.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
|
@@ -2,13 +2,11 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {before, category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
-
import {Column} from 'datagrok-api/dg';
|
|
8
7
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
8
|
|
|
10
9
|
category('detectorsBenchmark', () => {
|
|
11
|
-
|
|
12
10
|
let detectFunc: DG.Func;
|
|
13
11
|
|
|
14
12
|
before(async () => {
|
|
@@ -23,38 +21,38 @@ category('detectorsBenchmark', () => {
|
|
|
23
21
|
// -- fasta --
|
|
24
22
|
|
|
25
23
|
test('fastaDnaShorts50Few50', async () => {
|
|
26
|
-
|
|
24
|
+
await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 50);
|
|
27
25
|
},
|
|
28
26
|
{skipReason: '#1192'});
|
|
29
27
|
|
|
30
28
|
test('fastaDnaShorts50Many1E6', async () => {
|
|
31
|
-
|
|
29
|
+
await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 50, 1E6);
|
|
32
30
|
},
|
|
33
31
|
{skipReason: '#1192'});
|
|
34
32
|
|
|
35
33
|
test('fastaDnaLong1e6Few50', async () => {
|
|
36
|
-
|
|
34
|
+
await detectMacromoleculeBenchmark(10, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
|
|
37
35
|
},
|
|
38
36
|
{skipReason: '#1192'});
|
|
39
37
|
|
|
40
38
|
// -- separator --
|
|
41
39
|
|
|
42
40
|
test('separatorDnaShorts50Few50', async () => {
|
|
43
|
-
|
|
41
|
+
detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 50, '/');
|
|
44
42
|
}, {skipReason: '#1192'});
|
|
45
43
|
|
|
46
44
|
test('separatorDnaShorts50Many1E6', async () => {
|
|
47
|
-
|
|
45
|
+
detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 50, 1E6, '/');
|
|
48
46
|
},
|
|
49
47
|
{ /* skipReason: 'slow transmit large dataset to detector' */});
|
|
50
48
|
|
|
51
49
|
test('separatorDnaLong1e6Few50', async () => {
|
|
52
|
-
|
|
50
|
+
detectMacromoleculeBenchmark(10, NOTATION.SEPARATOR, ALPHABET.DNA, 1E6, 50, '/');
|
|
53
51
|
},
|
|
54
52
|
{skipReason: '#1192'});
|
|
55
53
|
|
|
56
54
|
async function detectMacromoleculeBenchmark(
|
|
57
|
-
maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string
|
|
55
|
+
maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string,
|
|
58
56
|
): Promise<number> {
|
|
59
57
|
return await benchmark<DG.FuncCall, DG.Column>(10,
|
|
60
58
|
(): DG.FuncCall => {
|
|
@@ -70,48 +68,48 @@ category('detectorsBenchmark', () => {
|
|
|
70
68
|
semType: DG.SEMTYPE.MACROMOLECULE,
|
|
71
69
|
notation: notation,
|
|
72
70
|
alphabet: alphabet,
|
|
73
|
-
separator: separator
|
|
71
|
+
separator: separator,
|
|
74
72
|
});
|
|
75
73
|
});
|
|
76
74
|
}
|
|
77
75
|
|
|
78
76
|
function generate(
|
|
79
|
-
notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string
|
|
77
|
+
notation: NOTATION, alphabet: string[], length: number, count: number, separator?: string,
|
|
80
78
|
): DG.Column {
|
|
81
79
|
let seqMerger: (seqMList: string[], separator?: string) => string;
|
|
82
80
|
|
|
83
81
|
switch (notation) {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
82
|
+
case NOTATION.FASTA:
|
|
83
|
+
seqMerger = (seqMList: string[]): string => {
|
|
84
|
+
let res: string = '';
|
|
85
|
+
for (let j = 0; j < seqMList.length; j++) {
|
|
86
|
+
const m = seqMList[j];
|
|
87
|
+
res += m.length == 1 ? m : `[${m}]`;
|
|
88
|
+
}
|
|
89
|
+
return res;
|
|
90
|
+
};
|
|
91
|
+
break;
|
|
92
|
+
case NOTATION.SEPARATOR:
|
|
93
|
+
seqMerger = (seqMList: string[], separator?: string): string => {
|
|
94
|
+
return seqMList.join(separator);
|
|
95
|
+
};
|
|
96
|
+
break;
|
|
97
|
+
default:
|
|
98
|
+
throw new Error(`Not supported notation '${notation}'.`);
|
|
101
99
|
}
|
|
102
100
|
|
|
103
101
|
const buildSeq = (alphabet: string[], length: number): string => {
|
|
104
102
|
const seqMList = new Array<string>(length);
|
|
105
|
-
for (let j = 0; j < length; j++)
|
|
103
|
+
for (let j = 0; j < length; j++)
|
|
106
104
|
seqMList[j] = alphabet[Math.floor(Math.random() * alphabet.length)];
|
|
107
|
-
|
|
105
|
+
|
|
108
106
|
return seqMerger(seqMList, separator);
|
|
109
107
|
};
|
|
110
108
|
|
|
111
109
|
const seqList: string[] = Array(count);
|
|
112
|
-
for (let i = 0; i < count; i++)
|
|
110
|
+
for (let i = 0; i < count; i++)
|
|
113
111
|
seqList[i] = buildSeq(alphabet, length);
|
|
114
|
-
|
|
112
|
+
|
|
115
113
|
|
|
116
114
|
return DG.Column.fromStrings('seq', seqList);
|
|
117
115
|
}
|
|
@@ -123,13 +121,13 @@ category('detectorsBenchmark', () => {
|
|
|
123
121
|
funcCall.callSync();
|
|
124
122
|
const semType = funcCall.getOutputParamValue();
|
|
125
123
|
|
|
126
|
-
const col: DG.Column = funcCall.inputs.col;
|
|
124
|
+
const col: DG.Column = funcCall.inputs.col as unknown as DG.Column;
|
|
127
125
|
if (semType) col.semType = semType;
|
|
128
126
|
return col;
|
|
129
127
|
}
|
|
130
128
|
|
|
131
129
|
function checkDetectorRes(col: DG.Column, tgt: TgtType): void {
|
|
132
|
-
const uh =
|
|
130
|
+
const uh = UnitsHandler.getOrCreate(col);
|
|
133
131
|
expect(col.semType, tgt.semType);
|
|
134
132
|
expect(uh.notation, tgt.notation);
|
|
135
133
|
expect(uh.alphabet, tgt.alphabet);
|
|
@@ -138,9 +136,9 @@ category('detectorsBenchmark', () => {
|
|
|
138
136
|
});
|
|
139
137
|
|
|
140
138
|
|
|
141
|
-
|
|
139
|
+
//Returns ET [ms] of test()
|
|
142
140
|
async function benchmark<TData, TRes>(
|
|
143
|
-
maxET: number, prepare: () => TData, test: (data: TData) => Promise<TRes>, check: (res: TRes) => void
|
|
141
|
+
maxET: number, prepare: () => TData, test: (data: TData) => Promise<TRes>, check: (res: TRes) => void,
|
|
144
142
|
): Promise<number> {
|
|
145
143
|
const data: TData = prepare();
|
|
146
144
|
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
8
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
@@ -67,69 +67,56 @@ category('detectors', () => {
|
|
|
67
67
|
[csvTests.negSmiles]: string = `col1
|
|
68
68
|
CCCCN1C(=O)CN=C(c2cc(F)ccc12)C3CCCCC3
|
|
69
69
|
C1CCCCC1
|
|
70
|
-
CCCCCC
|
|
71
|
-
`;
|
|
70
|
+
CCCCCC`;
|
|
72
71
|
[csvTests.fastaDna1]: string = `seq
|
|
73
72
|
ACGTC
|
|
74
73
|
CAGTGT
|
|
75
|
-
TTCAAC
|
|
76
|
-
`;
|
|
74
|
+
TTCAAC`;
|
|
77
75
|
[csvTests.fastaRna1]: string = `seq
|
|
78
76
|
ACGUC
|
|
79
77
|
CAGUGU
|
|
80
|
-
UUCAAC
|
|
81
|
-
`;
|
|
78
|
+
UUCAAC`;
|
|
82
79
|
/** Pure amino acids sequence */
|
|
83
80
|
[csvTests.fastaPt1]: string = `seq
|
|
84
81
|
FWPHEY
|
|
85
82
|
YNRQWYV
|
|
86
|
-
MKPSEYV
|
|
87
|
-
`;
|
|
83
|
+
MKPSEYV`;
|
|
88
84
|
[csvTests.fastaUn]: string = `seq
|
|
89
85
|
[meI][hHis][Aca]NT[dE][Thr_PO3H2][Aca]D
|
|
90
86
|
[meI][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
91
|
-
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]
|
|
92
|
-
`;
|
|
87
|
+
[Lys_Boc][hHis][Aca][Cys_SEt]T[dK][Thr_PO3H2][Aca][Tyr_PO3H2]`;
|
|
93
88
|
[csvTests.sepDna]: string = `seq
|
|
94
89
|
A*C*G*T*C
|
|
95
90
|
C*A*G*T*G*T
|
|
96
|
-
T*T*C*A*A*C
|
|
97
|
-
`;
|
|
91
|
+
T*T*C*A*A*C`;
|
|
98
92
|
[csvTests.sepRna]: string = `seq
|
|
99
93
|
A*C*G*U*C
|
|
100
94
|
C*A*G*U*G*U
|
|
101
|
-
U*U*C*A*A*C
|
|
102
|
-
`;
|
|
95
|
+
U*U*C*A*A*C`;
|
|
103
96
|
[csvTests.sepPt]: string = `seq
|
|
104
97
|
F-W-P-H-E-Y
|
|
105
98
|
Y-N-R-Q-W-Y-V
|
|
106
|
-
M-K-P-S-E-Y-V
|
|
107
|
-
`;
|
|
99
|
+
M-K-P-S-E-Y-V`;
|
|
108
100
|
[csvTests.sepUn1]: string = `seq
|
|
109
101
|
abc-dfgg-abc1-cfr3-rty-wert
|
|
110
102
|
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
111
|
-
rut12-rty-her2-abc-cfr3-wert-rut12
|
|
112
|
-
`;
|
|
103
|
+
rut12-rty-her2-abc-cfr3-wert-rut12`;
|
|
113
104
|
[csvTests.sepUn2]: string = `seq
|
|
114
105
|
abc/dfgg/abc1/cfr3/rty/wert
|
|
115
106
|
rut12/her2/rty/wert//abc/abc1/dfgg
|
|
116
|
-
rut12/rty/her2/abc/cfr3//wert/rut12
|
|
117
|
-
`;
|
|
107
|
+
rut12/rty/her2/abc/cfr3//wert/rut12`;
|
|
118
108
|
[csvTests.sepMsaDna1]: string = `seq
|
|
119
109
|
A-C--G-T--C-T
|
|
120
110
|
C-A-C--T--G-T
|
|
121
|
-
A-C-C-G-T-A-C-T
|
|
122
|
-
`;
|
|
111
|
+
A-C-C-G-T-A-C-T`;
|
|
123
112
|
[csvTests.fastaMsaDna1]: string = `seq
|
|
124
113
|
AC-GT-CT
|
|
125
114
|
CAC-T-GT
|
|
126
|
-
ACCGTACT
|
|
127
|
-
`;
|
|
115
|
+
ACCGTACT`;
|
|
128
116
|
[csvTests.fastaMsaPt1]: string = `seq
|
|
129
117
|
FWR-WYV-KHP
|
|
130
118
|
YNR-WYV-KHP
|
|
131
|
-
MWRSWY-CKHP
|
|
132
|
-
`;
|
|
119
|
+
MWRSWY-CKHP`;
|
|
133
120
|
}();
|
|
134
121
|
|
|
135
122
|
const enum Samples {
|
|
@@ -201,7 +188,7 @@ MWRSWY-CKHP
|
|
|
201
188
|
return df;
|
|
202
189
|
}
|
|
203
190
|
|
|
204
|
-
async function
|
|
191
|
+
async function _readFileFasta(file: string): Promise<DG.DataFrame> {
|
|
205
192
|
const txt: string = await grok.dapi.files.readAsText(file);
|
|
206
193
|
const df: DG.DataFrame = importFasta(txt)[0];
|
|
207
194
|
return df;
|
|
@@ -223,6 +210,8 @@ MWRSWY-CKHP
|
|
|
223
210
|
test('Negative2', async () => { await _testNeg(readCsv(csvTests.neg2), 'col1'); });
|
|
224
211
|
test('Negative3', async () => { await _testNeg(readCsv(csvTests.neg3), 'col1'); });
|
|
225
212
|
test('NegativeSmiles', async () => { await _testNeg(readCsv(csvTests.negSmiles), 'col1'); });
|
|
213
|
+
test('NegativeStartEnd', async () => { await _testNegList(['START', 'END']); });
|
|
214
|
+
test('NegativeStartEndIntermediate', async () => { await _testNegList(['START', 'END', 'INTERMEDIATE']); });
|
|
226
215
|
|
|
227
216
|
test('FastaDna1', async () => {
|
|
228
217
|
await _testPos(readCsv(csvTests.fastaDna1), 'seq',
|
|
@@ -375,6 +364,15 @@ MWRSWY-CKHP
|
|
|
375
364
|
});
|
|
376
365
|
});
|
|
377
366
|
|
|
367
|
+
export async function _testNegList(list: string[]): Promise<void> {
|
|
368
|
+
const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'col1', list);
|
|
369
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
370
|
+
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
371
|
+
const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
|
|
372
|
+
throw new Error(msg);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
378
376
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
379
377
|
const df: DG.DataFrame = await readDf();
|
|
380
378
|
const col: DG.Column = df.getCol(colName)!;
|
|
@@ -386,16 +384,13 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
386
384
|
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
387
385
|
const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
|
|
388
386
|
throw new Error(msg);
|
|
389
|
-
// col.semType = '';
|
|
390
|
-
// col.setTag(DG.TAGS.UNITS, '');
|
|
391
|
-
// col.setTag(NOTATION.SEPARATOR, '');
|
|
392
387
|
}
|
|
393
388
|
}
|
|
394
389
|
|
|
395
390
|
export async function _testPos(
|
|
396
391
|
readDf: DfReaderFunc, colName: string, units: string,
|
|
397
392
|
aligned: string | null, alphabet: string | null, alphabetSize: number, alphabetIsMultichar: boolean,
|
|
398
|
-
separator: string | null = null
|
|
393
|
+
separator: string | null = null,
|
|
399
394
|
) {
|
|
400
395
|
const df: DG.DataFrame = await readDf();
|
|
401
396
|
const col: DG.Column = df.col(colName)!;
|
|
@@ -411,7 +406,7 @@ export async function _testPos(
|
|
|
411
406
|
if (separator)
|
|
412
407
|
expect(col.getTag(bioTAGS.separator), separator);
|
|
413
408
|
|
|
414
|
-
const uh =
|
|
409
|
+
const uh = UnitsHandler.getOrCreate(col);
|
|
415
410
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
416
411
|
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
417
412
|
if (!uh.isHelm()) {
|
|
@@ -427,7 +422,7 @@ class PosCol {
|
|
|
427
422
|
public readonly alphabet: string | null,
|
|
428
423
|
public readonly alphabetSize: number,
|
|
429
424
|
public readonly alphabetIsMultichar: boolean,
|
|
430
|
-
public readonly separator?: string
|
|
425
|
+
public readonly separator?: string,
|
|
431
426
|
) { };
|
|
432
427
|
}
|
|
433
428
|
|
|
@@ -2,7 +2,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {ALIGNMENT, ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {_testNeg, _testPos} from './detectors-tests';
|
|
8
8
|
import {DfReaderFunc} from './types';
|
|
@@ -31,61 +31,51 @@ category('detectors:weak-and-likely', () => {
|
|
|
31
31
|
1,TTTTT
|
|
32
32
|
2,TTTTT
|
|
33
33
|
3,TTTTT
|
|
34
|
-
4,TTTTT
|
|
35
|
-
`,
|
|
34
|
+
4,TTTTT`,
|
|
36
35
|
[csvTests.fastaDnaWeak1LikelyName]: `id,seq
|
|
37
36
|
1,TTTTT
|
|
38
37
|
2,TTTTT
|
|
39
38
|
3,TTTTT
|
|
40
|
-
4,TTTTT
|
|
41
|
-
`,
|
|
39
|
+
4,TTTTT`,
|
|
42
40
|
[csvTests.fastaRnaWeak1]: `id,colName
|
|
43
41
|
1,UUUUU
|
|
44
42
|
2,UUUUU
|
|
45
43
|
3,UUUUU
|
|
46
|
-
4,UUUUU
|
|
47
|
-
`,
|
|
44
|
+
4,UUUUU`,
|
|
48
45
|
[csvTests.fastaRnaWeak1LikelyName]: `id,seq
|
|
49
46
|
1,UUUUU
|
|
50
47
|
2,UUUUU
|
|
51
48
|
3,UUUUU
|
|
52
|
-
4,UUUUU
|
|
53
|
-
`,
|
|
49
|
+
4,UUUUU`,
|
|
54
50
|
[csvTests.fastaPtWeak1]: `id,colName
|
|
55
51
|
1,SLSLSPGK
|
|
56
52
|
2,SLSLSPGK
|
|
57
53
|
3,SLSLSPGK
|
|
58
|
-
4,SLSLSPGK
|
|
59
|
-
`,
|
|
54
|
+
4,SLSLSPGK`,
|
|
60
55
|
[csvTests.fastaPtWeak1LikelyName]: `id,seq
|
|
61
56
|
1,SLSLSPGK
|
|
62
57
|
2,SLSLSPGK
|
|
63
58
|
3,SLSLSPGK
|
|
64
|
-
4,SLSLSPGK
|
|
65
|
-
`,
|
|
59
|
+
4,SLSLSPGK`,
|
|
66
60
|
[csvTests.fastaUn1]: `id,colName
|
|
67
61
|
1,word
|
|
68
62
|
2,other
|
|
69
63
|
3,some
|
|
70
|
-
4,another
|
|
71
|
-
`,
|
|
64
|
+
4,another`,
|
|
72
65
|
[csvTests.fastaUn1LikelyName]: `id,seq
|
|
73
66
|
1,word
|
|
74
67
|
2,other
|
|
75
68
|
3,some
|
|
76
|
-
4,another
|
|
77
|
-
`,
|
|
69
|
+
4,another`,
|
|
78
70
|
[csvTests.fastaUn2LikelyName]: `protein
|
|
79
71
|
Boombastic
|
|
80
72
|
Megafantastic
|
|
81
|
-
"just-a-random-thought,oy!"
|
|
82
|
-
`,
|
|
73
|
+
"just-a-random-thought,oy!"`,
|
|
83
74
|
[csvTests.fastaUnMsa1LikelyName]: `id,seq
|
|
84
75
|
1,word
|
|
85
76
|
2,male
|
|
86
77
|
3,bare
|
|
87
|
-
4,core
|
|
88
|
-
`,
|
|
78
|
+
4,core`,
|
|
89
79
|
};
|
|
90
80
|
|
|
91
81
|
const readCsv: (key: csvTests) => DfReaderFunc = (key: keyof typeof csvData) => {
|
|
@@ -47,7 +47,7 @@ MDYKETLLMP
|
|
|
47
47
|
KTDFPMRGGL
|
|
48
48
|
>3
|
|
49
49
|
P
|
|
50
|
-
|
|
50
|
+
`,
|
|
51
51
|
},
|
|
52
52
|
[SaveAsFastaTests.test2]: {
|
|
53
53
|
srcCsv: `id,id2,seq
|
|
@@ -66,8 +66,8 @@ KTDFP
|
|
|
66
66
|
MRGGL
|
|
67
67
|
>seqC|3
|
|
68
68
|
[MeA]
|
|
69
|
-
|
|
70
|
-
}
|
|
69
|
+
`,
|
|
70
|
+
},
|
|
71
71
|
};
|
|
72
72
|
|
|
73
73
|
test('wrapSequenceSingle', async () => {
|
|
@@ -5,7 +5,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
5
5
|
|
|
6
6
|
import {category, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
8
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
category('fastaFileHandler', () => {
|
|
@@ -71,7 +70,7 @@ YHSPFHN
|
|
|
71
70
|
const descriptionsArray = [
|
|
72
71
|
'description:1', 'description:2', 'description:3', 'description:4',
|
|
73
72
|
];
|
|
74
|
-
const
|
|
73
|
+
const _descriptionCol = DG.Column.fromStrings('description', descriptionsArray);
|
|
75
74
|
|
|
76
75
|
const sequencesArray = [
|
|
77
76
|
'MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW',
|
|
@@ -86,7 +85,7 @@ YHSPFHN
|
|
|
86
85
|
const parsedSequencesArray = ffh.sequencesArray;
|
|
87
86
|
expectArray(
|
|
88
87
|
[parsedDescriptionsArray, parsedSequencesArray],
|
|
89
|
-
[descriptionsArray, sequencesArray]
|
|
88
|
+
[descriptionsArray, sequencesArray],
|
|
90
89
|
);
|
|
91
90
|
}
|
|
92
91
|
|
package/src/tests/lib-tests.ts
CHANGED
|
@@ -3,11 +3,9 @@ import * as grok from 'datagrok-api/grok';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
5
|
|
|
6
|
-
import {category
|
|
7
|
-
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
8
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
|
+
import {category} from '@datagrok-libraries/utils/src/test';
|
|
9
7
|
|
|
10
|
-
category('monomer lib', () => {
|
|
8
|
+
category('monomer lib', () => {
|
|
11
9
|
// test('monomerManager', async() => {
|
|
12
10
|
// const df: DG.DataFrame = DG.DataFrame.fromCsv(await _package.files.readAsText('tests/test.csv'));
|
|
13
11
|
// grok.shell.addTableView(df);
|
|
@@ -60,63 +60,67 @@ category('Distance', async () => {
|
|
|
60
60
|
|
|
61
61
|
test('levenstein-sub', async () => {
|
|
62
62
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
|
|
63
|
-
_testDistance(prot1, prot2, df,
|
|
63
|
+
_testDistance(prot1, prot2, df, 0.2);
|
|
64
64
|
});
|
|
65
65
|
test('levenstein-del', async () => {
|
|
66
66
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.LEVENSHTEIN]();
|
|
67
|
-
_testDistance(prot3, prot4, df,
|
|
67
|
+
_testDistance(prot3, prot4, df, 0.4);
|
|
68
68
|
});
|
|
69
69
|
|
|
70
70
|
test('hamming', async () => {
|
|
71
71
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.HAMMING]();
|
|
72
|
-
_testDistance(prot3, prot4, df,
|
|
72
|
+
_testDistance(prot3, prot4, df, 0.6);
|
|
73
73
|
});
|
|
74
74
|
|
|
75
75
|
// Note that here the result is actually an inverted value of alignment score, which is coorelated with distance
|
|
76
76
|
// tests using default BLOSUM62 matrix are in agreement with the results of the online tool
|
|
77
77
|
test('needleman-blosum62', async () => {
|
|
78
78
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
|
|
79
|
-
_testDistance(prot1, prot2, df,
|
|
79
|
+
_testDistance(prot1, prot2, df, 0.205);
|
|
80
80
|
});
|
|
81
81
|
|
|
82
82
|
test('needleman-blosum62-del', async () => {
|
|
83
83
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH]();
|
|
84
|
-
_testDistance(prot3, prot4, df,
|
|
84
|
+
_testDistance(prot3, prot4, df, 0.65);
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
test('needleman-custom-sub', async () => {
|
|
88
88
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
89
|
-
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
89
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
|
|
90
90
|
);
|
|
91
|
-
_testDistance(prot1, prot2, df,
|
|
91
|
+
_testDistance(prot1, prot2, df, 0.2);
|
|
92
92
|
});
|
|
93
93
|
|
|
94
94
|
test('needleman-custom-del', async () => {
|
|
95
95
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
96
|
-
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
96
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
|
|
97
97
|
);
|
|
98
|
-
_testDistance(prot3, prot4, df,
|
|
98
|
+
_testDistance(prot3, prot4, df, 0.8);
|
|
99
99
|
});
|
|
100
100
|
|
|
101
101
|
test('needleman-custom-zero-extend', async () => {
|
|
102
102
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
103
|
-
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0}
|
|
103
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 0},
|
|
104
104
|
);
|
|
105
|
-
_testDistance(prot5, prot6, df,
|
|
105
|
+
_testDistance(prot5, prot6, df, 0.714);
|
|
106
106
|
});
|
|
107
107
|
|
|
108
108
|
test('needleman-custom-half-extend', async () => {
|
|
109
109
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
110
|
-
{scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1}
|
|
110
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 2, gapExtend: 1},
|
|
111
111
|
);
|
|
112
|
-
_testDistance(prot5, prot6, df,
|
|
112
|
+
_testDistance(prot5, prot6, df, 1.286);
|
|
113
113
|
});
|
|
114
114
|
|
|
115
115
|
test('needleman-custom-same-extend', async () => {
|
|
116
116
|
const df = mmDistanceFunctions[MmDistanceFunctionsNames.NEEDLEMANN_WUNSCH](
|
|
117
|
-
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1}
|
|
117
|
+
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
|
|
118
118
|
);
|
|
119
|
-
|
|
119
|
+
if (DG.Test.isInBenchmark) {
|
|
120
|
+
const seq1 = Array(10000).fill('FWRY').join('');
|
|
121
|
+
const seq2 = Array(10000).fill('FYWRRY').join('');
|
|
122
|
+
_testDistance(seq1, seq2, df, 0.667);
|
|
123
|
+
} else { _testDistance(prot5, prot6, df, 1.143); }
|
|
120
124
|
});
|
|
121
125
|
});
|
|
122
126
|
|
|
@@ -128,11 +132,15 @@ async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
|
|
|
128
132
|
if (semType)
|
|
129
133
|
seqCol.semType = semType;
|
|
130
134
|
await grok.data.detectSemanticTypes(srcDf);
|
|
131
|
-
const uh =
|
|
135
|
+
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
132
136
|
return uh;
|
|
133
137
|
}
|
|
134
138
|
|
|
135
139
|
function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
|
|
136
140
|
const d = df(seq1, seq2);
|
|
137
|
-
expect(d, expected);
|
|
141
|
+
expect(Number(d.toFixed(3)), Number(expected.toFixed(3)));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function mapToFixed(ar: Float32Array | number[]) {
|
|
145
|
+
return Array.from(ar).map((d) => Number(d.toFixed(3)));
|
|
138
146
|
}
|
|
@@ -29,6 +29,6 @@ category('monomerLibraries', () => {
|
|
|
29
29
|
|
|
30
30
|
// Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
|
|
31
31
|
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
32
|
-
expect(currentMonomerLib.
|
|
32
|
+
expect(currentMonomerLib.getPolymerTypes().length > 0, true);
|
|
33
33
|
});
|
|
34
34
|
});
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -6,6 +6,7 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
6
6
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
8
|
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
9
|
+
import {awaitContainerStart} from './utils';
|
|
9
10
|
//import * as grok from 'datagrok-api/grok';
|
|
10
11
|
|
|
11
12
|
export const _package = new DG.Package();
|
|
@@ -75,31 +76,33 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
75
76
|
|
|
76
77
|
test('isCorrect', async () => {
|
|
77
78
|
await _testMsaIsCorrect(fromCsv, toCsv);
|
|
78
|
-
});
|
|
79
|
+
}, {skipReason: 'GROK-13221'});
|
|
79
80
|
|
|
80
81
|
test('isCorrectLong', async () => {
|
|
81
82
|
await _testMsaIsCorrect(longFromCsv, longToCsv);
|
|
82
|
-
});
|
|
83
|
+
}, {skipReason: 'GROK-13221'});
|
|
83
84
|
|
|
84
85
|
test('isCorrectHelm', async () => {
|
|
86
|
+
await awaitContainerStart();
|
|
85
87
|
await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
86
|
-
}, {skipReason: 'GROK-
|
|
88
|
+
}, {skipReason: 'GROK-13221'});
|
|
87
89
|
|
|
88
90
|
test('isCorrectHelmLong', async () => {
|
|
91
|
+
await awaitContainerStart();
|
|
89
92
|
await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
90
|
-
}, {skipReason: 'GROK-
|
|
93
|
+
}, {skipReason: 'GROK-13221'});
|
|
91
94
|
|
|
92
95
|
test('isCorrectSeparator', async () => {
|
|
93
96
|
await _testMSAOnColumn(
|
|
94
|
-
SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
97
|
+
SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT,
|
|
95
98
|
);
|
|
96
|
-
});
|
|
99
|
+
}, {skipReason: 'GROK-13221'});
|
|
97
100
|
|
|
98
101
|
test('isCorrectSeparatorLong', async () => {
|
|
99
102
|
await _testMSAOnColumn(
|
|
100
|
-
SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
103
|
+
SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT,
|
|
101
104
|
);
|
|
102
|
-
});
|
|
105
|
+
}, {skipReason: 'GROK-13221'});
|
|
103
106
|
});
|
|
104
107
|
|
|
105
108
|
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
@@ -119,7 +122,7 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
119
122
|
|
|
120
123
|
async function _testMSAOnColumn(
|
|
121
124
|
srcCsv: string, tgtCsv: string,
|
|
122
|
-
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string
|
|
125
|
+
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string,
|
|
123
126
|
): Promise<void> {
|
|
124
127
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
125
128
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
@@ -2,6 +2,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
|
|
3
3
|
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {runPepsea} from '../utils/pepsea';
|
|
5
|
+
import {awaitContainerStart} from './utils';
|
|
5
6
|
|
|
6
7
|
category('PepSeA', () => {
|
|
7
8
|
const testCsv = `HELM,MSA
|
|
@@ -12,10 +13,12 @@ category('PepSeA', () => {
|
|
|
12
13
|
"PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
|
|
13
14
|
|
|
14
15
|
test('Basic alignment', async () => {
|
|
16
|
+
await awaitContainerStart();
|
|
15
17
|
const table = DG.DataFrame.fromCsv(testCsv);
|
|
16
18
|
const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
|
|
19
|
+
expect(alignedCol !== null, true, 'PepSeA conainter has not started');
|
|
17
20
|
const alignedTestCol = table.getCol('MSA');
|
|
18
|
-
for (let i = 0; i < alignedCol
|
|
19
|
-
expect(alignedCol
|
|
20
|
-
}, {skipReason: 'GROK-
|
|
21
|
+
for (let i = 0; i < alignedCol!.length; ++i)
|
|
22
|
+
expect(alignedCol!.get(i) == alignedTestCol.get(i), true);
|
|
23
|
+
}, {skipReason: 'GROK-13221'});
|
|
21
24
|
});
|