@datagrok/bio 2.11.42 → 2.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +3 -3
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/files/tests/libraries/HELMmonomerSchema.json +1 -1
- package/package.json +11 -11
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/package.ts +12 -15
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +5 -5
- package/src/tests/bio-tests.ts +13 -6
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/substructure-filters-tests.ts +143 -1
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +4 -3
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/poly-tool/ui.ts +46 -135
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +46 -54
- package/src/widgets/bio-substructure-filter-types.ts +19 -45
- package/src/widgets/bio-substructure-filter.ts +45 -23
- package/src/widgets/composition-analysis-widget.ts +8 -8
|
@@ -6,7 +6,7 @@ import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
|
6
6
|
|
|
7
7
|
import {importFasta} from '../package';
|
|
8
8
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
-
import {
|
|
9
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
10
10
|
|
|
11
11
|
/*
|
|
12
12
|
// snippet to list df columns of semType='Macromolecule' (false positive)
|
|
@@ -433,12 +433,12 @@ export async function _testPosList(list: string[], units: NOTATION,
|
|
|
433
433
|
if (separator)
|
|
434
434
|
expect(col.getTag(bioTAGS.separator), separator);
|
|
435
435
|
|
|
436
|
-
const
|
|
437
|
-
expect(
|
|
438
|
-
expect(
|
|
439
|
-
if (!
|
|
440
|
-
expect(
|
|
441
|
-
expect(
|
|
436
|
+
const sh = SeqHandler.forColumn(col);
|
|
437
|
+
expect(sh.getAlphabetSize(), alphabetSize);
|
|
438
|
+
expect(sh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
439
|
+
if (!sh.isHelm()) {
|
|
440
|
+
expect(sh.aligned, aligned);
|
|
441
|
+
expect(sh.alphabet, alphabet);
|
|
442
442
|
}
|
|
443
443
|
}
|
|
444
444
|
|
|
@@ -461,12 +461,12 @@ export async function _testPos(
|
|
|
461
461
|
if (separator)
|
|
462
462
|
expect(col.getTag(bioTAGS.separator), separator);
|
|
463
463
|
|
|
464
|
-
const
|
|
465
|
-
expect(
|
|
466
|
-
expect(
|
|
467
|
-
if (!
|
|
468
|
-
expect(
|
|
469
|
-
expect(
|
|
464
|
+
const sh = SeqHandler.forColumn(col);
|
|
465
|
+
expect(sh.getAlphabetSize(), alphabetSize);
|
|
466
|
+
expect(sh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
467
|
+
if (!sh.isHelm()) {
|
|
468
|
+
expect(sh.aligned, aligned);
|
|
469
|
+
expect(sh.alphabet, alphabet);
|
|
470
470
|
}
|
|
471
471
|
}
|
|
472
472
|
|
|
@@ -4,7 +4,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
-
import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {NOTATION, splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
9
|
|
|
9
10
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
11
|
|
|
@@ -87,10 +88,13 @@ MRGGL
|
|
|
87
88
|
});
|
|
88
89
|
|
|
89
90
|
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
90
|
-
const splitter = splitterAsFasta;
|
|
91
|
-
|
|
92
91
|
const srcSeq: string = wrapData[testKey].src;
|
|
93
|
-
const
|
|
92
|
+
const col = DG.Column.fromStrings('src', [srcSeq]);
|
|
93
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
94
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
95
|
+
const sh = SeqHandler.forColumn(col);
|
|
96
|
+
const srcSS = sh.getSplitted(0);
|
|
97
|
+
const wrapRes: string[] = wrapSequence(srcSS, lineWidth);
|
|
94
98
|
const wrapTgt: string[] = wrapData[testKey].tgt;
|
|
95
99
|
|
|
96
100
|
expectArray(wrapRes, wrapTgt);
|
|
@@ -100,6 +104,8 @@ MRGGL
|
|
|
100
104
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(args.srcCsv);
|
|
101
105
|
|
|
102
106
|
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
107
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
108
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
103
109
|
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
104
110
|
|
|
105
111
|
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
@@ -3,7 +3,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
7
|
import {MmDistanceFunctionsNames, mmDistanceFunctions}
|
|
8
8
|
from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
9
|
|
|
@@ -42,20 +42,20 @@ ATCGAATCGA
|
|
|
42
42
|
ATCGAATCGA`;
|
|
43
43
|
|
|
44
44
|
test('protein-distance-function', async () => {
|
|
45
|
-
const
|
|
46
|
-
const distFunc =
|
|
45
|
+
const sh = await _initMacromoleculeColumn(protTable);
|
|
46
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
47
47
|
expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
48
48
|
});
|
|
49
49
|
|
|
50
50
|
test('DNA-distance-function', async () => {
|
|
51
|
-
const
|
|
52
|
-
const distFunc =
|
|
51
|
+
const sh = await _initMacromoleculeColumn(DNATable);
|
|
52
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
53
53
|
expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
54
54
|
});
|
|
55
55
|
|
|
56
56
|
test('MSA-distance-function', async () => {
|
|
57
|
-
const
|
|
58
|
-
const distFunc =
|
|
57
|
+
const sh = await _initMacromoleculeColumn(MSATable);
|
|
58
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
59
59
|
expect(distFunc, MmDistanceFunctionsNames.HAMMING);
|
|
60
60
|
});
|
|
61
61
|
|
|
@@ -125,7 +125,7 @@ ATCGAATCGA`;
|
|
|
125
125
|
});
|
|
126
126
|
});
|
|
127
127
|
|
|
128
|
-
async function _initMacromoleculeColumn(csv: string): Promise<
|
|
128
|
+
async function _initMacromoleculeColumn(csv: string): Promise<SeqHandler> {
|
|
129
129
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
130
130
|
const seqCol = srcDf.col('seq')!;
|
|
131
131
|
const semType: string = await grok.functions
|
|
@@ -133,8 +133,8 @@ async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
|
|
|
133
133
|
if (semType)
|
|
134
134
|
seqCol.semType = semType;
|
|
135
135
|
await grok.data.detectSemanticTypes(srcDf);
|
|
136
|
-
const
|
|
137
|
-
return
|
|
136
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
137
|
+
return sh;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -103,13 +103,11 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
103
103
|
|
|
104
104
|
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
105
105
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
106
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
106
107
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
107
108
|
|
|
108
109
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
109
|
-
|
|
110
|
-
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
111
|
-
if (semType)
|
|
112
|
-
srcCol.semType = semType;
|
|
110
|
+
expect(srcCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
113
111
|
|
|
114
112
|
const tgtCol: DG.Column = tgtDf.getCol('seq')!;
|
|
115
113
|
const resCol: DG.Column = await runKalign(srcCol, true);
|
|
@@ -121,23 +119,18 @@ async function _testMSAOnColumn(
|
|
|
121
119
|
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string,
|
|
122
120
|
): Promise<void> {
|
|
123
121
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
122
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
124
123
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
125
124
|
|
|
126
|
-
const srcSeqCol = srcDf.getCol('seq')!;
|
|
127
125
|
const tgtCol = tgtDf.getCol('seq')!;
|
|
128
126
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
if (semType)
|
|
132
|
-
srcCol.semType = semType;
|
|
133
|
-
|
|
134
|
-
await grok.data.detectSemanticTypes(srcDf);
|
|
135
|
-
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
136
|
-
expect(srcSeqCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
127
|
+
expect(srcCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
128
|
+
expect(srcCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
137
129
|
if (alphabet)
|
|
138
|
-
expect(
|
|
130
|
+
expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
|
|
139
131
|
|
|
140
|
-
const msaSeqCol = await multipleSequenceAlignmentUI({col:
|
|
132
|
+
const msaSeqCol = await multipleSequenceAlignmentUI({col: srcCol, pepsea: {method: pepseaMethod}});
|
|
133
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
141
134
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
142
135
|
expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
143
136
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
@@ -6,7 +6,7 @@ import wu from 'wu';
|
|
|
6
6
|
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
|
|
8
8
|
import {monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
-
import {
|
|
9
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
10
10
|
|
|
11
11
|
import {_package} from '../package-test';
|
|
12
12
|
|
|
@@ -93,9 +93,9 @@ id3,QHIRE--LT
|
|
|
93
93
|
const charWidth: number = 7;
|
|
94
94
|
const sepWidth: number = 12;
|
|
95
95
|
const colTemp: MonomerPlacer = new MonomerPlacer(null, seqCol, () => {
|
|
96
|
-
const
|
|
96
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
97
97
|
return {
|
|
98
|
-
|
|
98
|
+
seqHandler: sh,
|
|
99
99
|
monomerCharWidth: charWidth,
|
|
100
100
|
separatorWidth: sepWidth,
|
|
101
101
|
monomerToShort: monomerToShort,
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
|
|
6
|
-
import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {category, expect, test, delay} from '@datagrok-libraries/utils/src/test';
|
|
7
5
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
7
|
|
|
10
8
|
import {importFasta} from '../package';
|
|
11
9
|
import {convertDo} from '../utils/convert';
|
|
@@ -148,8 +146,8 @@ category('renderers', () => {
|
|
|
148
146
|
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
149
147
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
150
148
|
|
|
151
|
-
// check newColumn with
|
|
152
|
-
const
|
|
149
|
+
// check newColumn with SeqHandler constructor
|
|
150
|
+
const _sh: SeqHandler = SeqHandler.forColumn(msaSeqCol);
|
|
153
151
|
}
|
|
154
152
|
|
|
155
153
|
async function _testAfterConvert() {
|
|
@@ -172,8 +170,8 @@ category('renderers', () => {
|
|
|
172
170
|
const resCellRenderer = tgtCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
173
171
|
expect(resCellRenderer, 'sequence');
|
|
174
172
|
|
|
175
|
-
// check tgtCol with
|
|
176
|
-
const
|
|
173
|
+
// check tgtCol with SeqHandler constructor
|
|
174
|
+
const _sh: SeqHandler = SeqHandler.forColumn(tgtCol);
|
|
177
175
|
}
|
|
178
176
|
|
|
179
177
|
async function _selectRendererBySemType() {
|
|
@@ -2,22 +2,23 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {
|
|
6
|
-
after, before, category, test, expect, expectArray
|
|
8
|
+
after, before, category, test, expect, expectArray
|
|
7
9
|
} from '@datagrok-libraries/utils/src/test';
|
|
8
|
-
import {
|
|
9
|
-
TAGS as bioTAGS,
|
|
10
|
-
splitterAsFasta,
|
|
11
|
-
splitterAsHelm,
|
|
12
|
-
NOTATION
|
|
13
|
-
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
+
import {TAGS as bioTAGS, splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
14
11
|
|
|
15
12
|
import {splitToMonomersUI} from '../utils/split-to-monomers';
|
|
16
13
|
import {awaitGrid} from './utils';
|
|
17
14
|
import * as C from '../utils/constants';
|
|
18
|
-
|
|
19
15
|
import {getHelmMonomers} from '../package';
|
|
20
16
|
|
|
17
|
+
import {splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
18
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
19
|
+
|
|
20
|
+
import {_package} from '../package-test';
|
|
21
|
+
|
|
21
22
|
category('splitters', async () => {
|
|
22
23
|
before(async () => {
|
|
23
24
|
});
|
|
@@ -132,7 +133,7 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
132
133
|
});
|
|
133
134
|
|
|
134
135
|
// test('helmAsFasta', async () => {
|
|
135
|
-
// // The columns can't be empty for
|
|
136
|
+
// // The columns can't be empty for SeqHandler
|
|
136
137
|
// /* eslint-disable max-len */
|
|
137
138
|
// const srcSeq = '[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]';
|
|
138
139
|
// const tgtSeqA = ['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'];
|
|
@@ -143,13 +144,13 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
143
144
|
});
|
|
144
145
|
|
|
145
146
|
export async function _testFastaSplitter(src: string, tgt: string[]) {
|
|
146
|
-
const res:
|
|
147
|
+
const res: ISeqSplitted = splitterAsFasta(src);
|
|
147
148
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
148
|
-
expectArray(res, tgt);
|
|
149
|
+
expectArray(wu(res.originals).toArray(), tgt);
|
|
149
150
|
}
|
|
150
151
|
|
|
151
152
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
152
|
-
const res:
|
|
153
|
+
const res: ISeqSplitted = splitterAsHelm(src);
|
|
153
154
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
154
|
-
expectArray(res, tgt);
|
|
155
|
+
expectArray(wu(res.originals).toArray(), tgt);
|
|
155
156
|
}
|
|
@@ -2,6 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import $ from 'cash-dom';
|
|
6
|
+
import wu from 'wu';
|
|
7
|
+
|
|
8
|
+
|
|
5
9
|
import {after, before, category, test, expect, delay, testEvent, awaitCheck} from '@datagrok-libraries/utils/src/test';
|
|
6
10
|
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
11
|
import {
|
|
@@ -19,7 +23,7 @@ import {HelmBioFilter} from '../widgets/bio-substructure-filter-helm';
|
|
|
19
23
|
import {_package} from '../package-test';
|
|
20
24
|
|
|
21
25
|
|
|
22
|
-
category('
|
|
26
|
+
category('bio-substructure-filters', async () => {
|
|
23
27
|
let monomerLibHelper: IMonomerLibHelper;
|
|
24
28
|
/** Backup actual user's monomer libraries settings */
|
|
25
29
|
let userLibSettings: UserLibSettings;
|
|
@@ -305,6 +309,144 @@ category('substructureFilters', async () => {
|
|
|
305
309
|
await Promise.all([f1.awaitRendered(), f2.awaitRendered()]);
|
|
306
310
|
await awaitGrid(view.grid);
|
|
307
311
|
});
|
|
312
|
+
|
|
313
|
+
// two seq columns
|
|
314
|
+
|
|
315
|
+
const twoColumnsCsv: string = `id,seq1,seq2,trueSeq1,trueSeq2
|
|
316
|
+
0,CGGCTACGGC,ATTGCATTCG,0,1,
|
|
317
|
+
1,CGGCTGCCGC,ATAGCATTCG,1,1,
|
|
318
|
+
2,CGGCTGCGCC,AATGCATACG,1,0,
|
|
319
|
+
3,CGGCTGCATT,TTTGCATTCG,1,1,
|
|
320
|
+
4,CGGCTGCATT,AAAGCATACG,1,0,
|
|
321
|
+
`;
|
|
322
|
+
|
|
323
|
+
test('two-columns-fasta', async () => {
|
|
324
|
+
const df = DG.DataFrame.fromCsv(twoColumnsCsv);
|
|
325
|
+
await grok.data.detectSemanticTypes(df);
|
|
326
|
+
const view = grok.shell.addTableView(df);
|
|
327
|
+
|
|
328
|
+
const fSeq1ColName: string = 'seq1';
|
|
329
|
+
const fSeq1SubStr: string = 'CGGCTG';
|
|
330
|
+
const fSeq1Trues: number[] = df.getCol('trueSeq1').toList();
|
|
331
|
+
|
|
332
|
+
const fSeq2ColName: string = 'seq2';
|
|
333
|
+
const fSeq2SubStr: string = 'GCATT';
|
|
334
|
+
const fSeq2Trues: number[] = df.getCol('trueSeq2').toList();
|
|
335
|
+
|
|
336
|
+
//const seq2Filter = new BioSubstructureFilter();
|
|
337
|
+
const filterList: any[] = [
|
|
338
|
+
{type: 'Bio:bioSubstructureFilter', columnName: fSeq1ColName},
|
|
339
|
+
{type: 'Bio:bioSubstructureFilter', columnName: fSeq2ColName},
|
|
340
|
+
];
|
|
341
|
+
const fg = (await df.plot.fromType(DG.VIEWER.FILTERS,
|
|
342
|
+
{filters: filterList})) as DG.FilterGroup;
|
|
343
|
+
view.dockManager.dock(fg, DG.DOCK_TYPE.LEFT);
|
|
344
|
+
await delay(100);
|
|
345
|
+
await awaitGrid(view.grid);
|
|
346
|
+
|
|
347
|
+
const seq1Filter = fg.filters[0] as BioSubstructureFilter;
|
|
348
|
+
const seq2Filter = fg.filters[1] as BioSubstructureFilter;
|
|
349
|
+
expect(seq1Filter.column!.name, fSeq1ColName);
|
|
350
|
+
expect(seq2Filter.column!.name, fSeq2ColName);
|
|
351
|
+
|
|
352
|
+
const seq1Bf = seq1Filter.bioFilter as FastaBioFilter;
|
|
353
|
+
const seq2Bf = seq2Filter.bioFilter as FastaBioFilter;
|
|
354
|
+
|
|
355
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
356
|
+
seq1Bf.props = new BioFilterProps(fSeq1SubStr);
|
|
357
|
+
}, 1000);
|
|
358
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
359
|
+
seq2Bf.props = new BioFilterProps('');
|
|
360
|
+
}, 1000, 'testEvent onRowsFiltered on seq1');
|
|
361
|
+
expect(df.filter.trueCount, fSeq1Trues.filter((v) => v === 1).length);
|
|
362
|
+
expect(df.filter.toBinaryString(), fSeq1Trues.map((v) => v.toString()).join(''));
|
|
363
|
+
|
|
364
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
365
|
+
seq1Bf.props = new BioFilterProps('');
|
|
366
|
+
}, 1000);
|
|
367
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
368
|
+
seq2Bf.props = new BioFilterProps(fSeq2SubStr);
|
|
369
|
+
}, 1000, 'testEvent onRowsFiltered on seq2');
|
|
370
|
+
expect(df.filter.trueCount, fSeq2Trues.filter((v) => v === 1).length);
|
|
371
|
+
expect(df.filter.toBinaryString(), fSeq2Trues.map((v) => v.toString()).join(''));
|
|
372
|
+
|
|
373
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
374
|
+
seq1Bf.props = new BioFilterProps('');
|
|
375
|
+
}, 1000);
|
|
376
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
377
|
+
seq2Bf.props = new BioFilterProps('');
|
|
378
|
+
}, 1000, 'testEvent onRowsFiltered on neither');
|
|
379
|
+
expect(df.filter.trueCount, df.rowCount);
|
|
380
|
+
|
|
381
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
382
|
+
seq1Bf.props = new BioFilterProps(fSeq1SubStr);
|
|
383
|
+
}, 5000);
|
|
384
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
385
|
+
seq2Bf.props = new BioFilterProps(fSeq2SubStr);
|
|
386
|
+
}, 5000, 'testEvent onRowsFiltered on both');
|
|
387
|
+
const bothTrues: number[] = wu.count(0).take(df.rowCount)
|
|
388
|
+
.map((rowI) => fSeq1Trues[rowI] * fSeq2Trues[rowI]).toArray();
|
|
389
|
+
expect(df.filter.trueCount, bothTrues.filter((v) => v === 1).length);
|
|
390
|
+
expect(df.filter.toBinaryString(), bothTrues.map((v) => v.toString()).join(''));
|
|
391
|
+
|
|
392
|
+
await Promise.all([seq1Filter.awaitRendered(), seq2Filter.awaitRendered(), awaitGrid(view.grid)]);
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// -- reset --
|
|
396
|
+
|
|
397
|
+
test('reset-fasta', async () => {
|
|
398
|
+
const df = await readDataframe('tests/filter_FASTA.csv');
|
|
399
|
+
await grok.data.detectSemanticTypes(df);
|
|
400
|
+
const view = grok.shell.addTableView(df);
|
|
401
|
+
|
|
402
|
+
const fSeqColName: string = 'fasta';
|
|
403
|
+
const fSubStr: string = 'MD';
|
|
404
|
+
const fTrueCount: number = 3;
|
|
405
|
+
|
|
406
|
+
const filterList = [{type: 'Bio:bioSubstructureFilter', columnName: fSeqColName}];
|
|
407
|
+
const fg = (await df.plot.fromType(DG.VIEWER.FILTERS,
|
|
408
|
+
{filters: filterList})) as DG.FilterGroup;
|
|
409
|
+
view.dockManager.dock(fg, DG.DOCK_TYPE.LEFT);
|
|
410
|
+
await delay(100);
|
|
411
|
+
await awaitGrid(view.grid);
|
|
412
|
+
|
|
413
|
+
const seqFilter = fg.filters[0] as BioSubstructureFilter;
|
|
414
|
+
const seqBf = seqFilter.bioFilter as FastaBioFilter;
|
|
415
|
+
await testEvent(df.onRowsFiltered, () => {}, () => {
|
|
416
|
+
seqBf.props = new BioFilterProps(fSubStr);
|
|
417
|
+
}, 1000, 'testEvent onRowsFiltered');
|
|
418
|
+
expect(df.filter.trueCount, fTrueCount);
|
|
419
|
+
expect(seqBf.props.substructure, fSubStr);
|
|
420
|
+
expect(seqBf.substructureInput.value, fSubStr);
|
|
421
|
+
|
|
422
|
+
const fgResetIconEl: HTMLElement = $(fg.root).find('i[name="icon-arrow-rotate-left"]')[0] as HTMLElement;
|
|
423
|
+
fgResetIconEl.click();
|
|
424
|
+
await delay(100);
|
|
425
|
+
await awaitGrid(view.grid);
|
|
426
|
+
expect(seqBf.props.substructure, '');
|
|
427
|
+
expect(seqBf.substructureInput.value, '');
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
test('reopen', async () => {
|
|
431
|
+
const df = await _package.files.readCsv('tests/filter_FASTA.csv');
|
|
432
|
+
const view = grok.shell.addTableView(df);
|
|
433
|
+
|
|
434
|
+
const filterList = [{type: 'Bio:bioSubstructureFilter', columnName: 'fasta'}];
|
|
435
|
+
|
|
436
|
+
const fg1 = (await df.plot.fromType(DG.VIEWER.FILTERS,
|
|
437
|
+
{filters: filterList})) as DG.FilterGroup;
|
|
438
|
+
const fg1Dn = view.dockManager.dock(fg1, DG.DOCK_TYPE.LEFT);
|
|
439
|
+
await delay(100);
|
|
440
|
+
await awaitGrid(view.grid);
|
|
441
|
+
fg1.close();
|
|
442
|
+
await awaitGrid(view.grid);
|
|
443
|
+
|
|
444
|
+
const fg2 = (await df.plot.fromType(DG.VIEWER.FILTERS,
|
|
445
|
+
{filters: filterList})) as DG.FilterGroup;
|
|
446
|
+
const fg2Dn = view.dockManager.dock(fg2, DG.DOCK_TYPE.LEFT);
|
|
447
|
+
await delay(100);
|
|
448
|
+
await awaitGrid(view.grid);
|
|
449
|
+
});
|
|
308
450
|
});
|
|
309
451
|
|
|
310
452
|
async function createFilter(colName: string, df: DG.DataFrame): Promise<BioSubstructureFilter> {
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
15
15
|
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
16
16
|
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
17
|
-
import {
|
|
17
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
18
18
|
|
|
19
19
|
import {toAtomicLevel} from '../package';
|
|
20
20
|
import {_package} from '../package-test';
|
|
@@ -198,7 +198,7 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
198
198
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
199
199
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
200
200
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.PT);
|
|
201
|
-
const
|
|
201
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
202
202
|
const resCol = (await _testToAtomicLevel(srcDf, 'seq', monomerLibHelper))!;
|
|
203
203
|
expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
|
|
204
204
|
});
|
|
@@ -2,10 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {
|
|
5
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
6
|
import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
|
|
8
|
-
category('
|
|
8
|
+
category('SeqHandler: getRegion', () => {
|
|
9
9
|
const data: {
|
|
10
10
|
[testName: string]: {
|
|
11
11
|
srcCsv: string,
|
|
@@ -76,8 +76,8 @@ PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
|
|
|
76
76
|
const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
77
77
|
if (semType) srcSeqCol.semType = semType;
|
|
78
78
|
|
|
79
|
-
const
|
|
80
|
-
const resSeqCol =
|
|
79
|
+
const srcSh = SeqHandler.forColumn(srcSeqCol);
|
|
80
|
+
const resSeqCol = srcSh.getRegion(testData.startIdx, testData.endIdx, 'regSeq');
|
|
81
81
|
|
|
82
82
|
const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
|
|
83
83
|
const tgtSeqCol = tgtDf.getCol('seq');
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
+
import wu from 'wu';
|
|
5
|
+
|
|
4
6
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {
|
|
7
|
+
import {GapOriginals, SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
8
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
9
|
|
|
9
10
|
enum Tests {
|
|
10
11
|
fasta = 'fasta',
|
|
@@ -14,10 +15,10 @@ enum Tests {
|
|
|
14
15
|
helm = 'helm',
|
|
15
16
|
}
|
|
16
17
|
|
|
17
|
-
category('
|
|
18
|
-
const fG =
|
|
19
|
-
const hG =
|
|
20
|
-
const sG =
|
|
18
|
+
category('SeqHandler', () => {
|
|
19
|
+
const fG = GapOriginals[NOTATION.FASTA];
|
|
20
|
+
const hG = GapOriginals[NOTATION.HELM];
|
|
21
|
+
const sG = GapOriginals[NOTATION.SEPARATOR];
|
|
21
22
|
const data: {
|
|
22
23
|
[testName: string]: {
|
|
23
24
|
src: { csv: string },
|
|
@@ -34,9 +35,9 @@ TTCAACTTCAAC`
|
|
|
34
35
|
tgt: {
|
|
35
36
|
notation: NOTATION.FASTA,
|
|
36
37
|
splitted: [
|
|
37
|
-
'
|
|
38
|
-
'
|
|
39
|
-
'
|
|
38
|
+
['A', 'C', 'G', 'T', 'C', 'A', 'C', 'G', 'T', 'C'],
|
|
39
|
+
['C', 'A', 'G', 'T', 'G', 'T', 'C', 'A', 'G', 'T', 'G', 'T'],
|
|
40
|
+
['T', 'T', 'C', 'A', 'A', 'C', 'T', 'T', 'C', 'A', 'A', 'C'],
|
|
40
41
|
]
|
|
41
42
|
}
|
|
42
43
|
},
|
|
@@ -51,9 +52,9 @@ ACCGTACTACCGTACT`,
|
|
|
51
52
|
notation: NOTATION.FASTA,
|
|
52
53
|
splitted: [
|
|
53
54
|
//@formatter:off
|
|
54
|
-
'
|
|
55
|
-
'
|
|
56
|
-
'
|
|
55
|
+
['A', 'C', '-', 'G', 'T', '-', 'C', 'T', 'A', 'C', '-', 'G', 'T', '-', 'C', 'T'],
|
|
56
|
+
['C', 'A', 'C', '-', 'T', '-', 'G', 'T', 'C', 'A', 'C', '-', 'T', '-', 'G', 'T'],
|
|
57
|
+
['A', 'C', 'C', 'G', 'T', 'A', 'C', 'T', 'A', 'C', 'C', 'G', 'T', 'A', 'C', 'T'],
|
|
57
58
|
//@formatter:on
|
|
58
59
|
]
|
|
59
60
|
}
|
|
@@ -129,12 +130,13 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
|
|
|
129
130
|
if (semType) col.semType = semType;
|
|
130
131
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
131
132
|
|
|
132
|
-
const
|
|
133
|
-
expect(
|
|
134
|
-
expect(
|
|
133
|
+
const sh = SeqHandler.forColumn(col);
|
|
134
|
+
expect(sh.notation, testData.tgt.notation);
|
|
135
|
+
expect(sh.separator === testData.tgt.separator, true);
|
|
135
136
|
|
|
136
|
-
const resSplitted:
|
|
137
|
+
const resSplitted: string[][] = wu.count(0).take(sh.length)
|
|
138
|
+
.map((rowIdx) => wu(sh.getSplitted(rowIdx).originals).toArray()).toArray();
|
|
137
139
|
expectArray(resSplitted, testData.tgt.splitted);
|
|
138
|
-
}
|
|
140
|
+
});
|
|
139
141
|
}
|
|
140
142
|
});
|