@datagrok/bio 2.11.42 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +1 -1
- package/detectors.js +11 -11
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/413.js +1 -1
- package/dist/413.js.map +1 -1
- package/dist/590.js +1 -1
- package/dist/590.js.map +1 -1
- package/dist/709.js +1 -1
- package/dist/709.js.map +1 -1
- package/dist/895.js +1 -1
- package/dist/895.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +10 -10
- package/src/analysis/sequence-activity-cliffs.ts +9 -9
- package/src/analysis/sequence-diversity-viewer.ts +3 -3
- package/src/analysis/sequence-search-base-viewer.ts +2 -2
- package/src/analysis/sequence-similarity-viewer.ts +10 -10
- package/src/analysis/sequence-space.ts +26 -23
- package/src/calculations/monomerLevelMols.ts +13 -11
- package/src/package.ts +8 -8
- package/src/tests/WebLogo-layout-tests.ts +5 -2
- package/src/tests/WebLogo-positions-test.ts +5 -5
- package/src/tests/bio-tests.ts +13 -6
- package/src/tests/converters-test.ts +4 -4
- package/src/tests/detectors-benchmark-tests.ts +5 -5
- package/src/tests/detectors-tests.ts +13 -13
- package/src/tests/fasta-export-tests.ts +10 -4
- package/src/tests/mm-distance-tests.ts +10 -10
- package/src/tests/msa-tests.ts +8 -15
- package/src/tests/renderers-monomer-placer.ts +3 -3
- package/src/tests/renderers-test.ts +6 -8
- package/src/tests/splitters-test.ts +14 -13
- package/src/tests/to-atomic-level-tests.ts +2 -2
- package/src/tests/units-handler-get-region.ts +4 -4
- package/src/tests/units-handler-splitted-tests.ts +19 -17
- package/src/tests/units-handler-tests.ts +32 -32
- package/src/utils/cell-renderer.ts +40 -34
- package/src/utils/check-input-column.ts +5 -5
- package/src/utils/context-menu.ts +9 -6
- package/src/utils/convert.ts +9 -9
- package/src/utils/get-region-func-editor.ts +11 -11
- package/src/utils/get-region.ts +10 -12
- package/src/utils/macromolecule-column-widget.ts +4 -3
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +6 -6
- package/src/utils/pepsea.ts +1 -0
- package/src/utils/poly-tool/transformation.ts +3 -3
- package/src/utils/save-as-fasta.ts +14 -15
- package/src/utils/sequence-to-mol.ts +4 -4
- package/src/viewers/web-logo-viewer.ts +46 -54
- package/src/widgets/bio-substructure-filter.ts +3 -3
- package/src/widgets/composition-analysis-widget.ts +8 -8
|
@@ -4,7 +4,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
-
import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
import {NOTATION, splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
9
|
|
|
9
10
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
11
|
|
|
@@ -87,10 +88,13 @@ MRGGL
|
|
|
87
88
|
});
|
|
88
89
|
|
|
89
90
|
function _testWrapSequence(testKey: string, lineWidth: number = 10) {
|
|
90
|
-
const splitter = splitterAsFasta;
|
|
91
|
-
|
|
92
91
|
const srcSeq: string = wrapData[testKey].src;
|
|
93
|
-
const
|
|
92
|
+
const col = DG.Column.fromStrings('src', [srcSeq]);
|
|
93
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
94
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
95
|
+
const sh = SeqHandler.forColumn(col);
|
|
96
|
+
const srcSS = sh.getSplitted(0);
|
|
97
|
+
const wrapRes: string[] = wrapSequence(srcSS, lineWidth);
|
|
94
98
|
const wrapTgt: string[] = wrapData[testKey].tgt;
|
|
95
99
|
|
|
96
100
|
expectArray(wrapRes, wrapTgt);
|
|
@@ -100,6 +104,8 @@ MRGGL
|
|
|
100
104
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(args.srcCsv);
|
|
101
105
|
|
|
102
106
|
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
107
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
108
|
+
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
103
109
|
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
104
110
|
|
|
105
111
|
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
@@ -3,7 +3,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
7
|
import {MmDistanceFunctionsNames, mmDistanceFunctions}
|
|
8
8
|
from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
9
|
|
|
@@ -42,20 +42,20 @@ ATCGAATCGA
|
|
|
42
42
|
ATCGAATCGA`;
|
|
43
43
|
|
|
44
44
|
test('protein-distance-function', async () => {
|
|
45
|
-
const
|
|
46
|
-
const distFunc =
|
|
45
|
+
const sh = await _initMacromoleculeColumn(protTable);
|
|
46
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
47
47
|
expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
48
48
|
});
|
|
49
49
|
|
|
50
50
|
test('DNA-distance-function', async () => {
|
|
51
|
-
const
|
|
52
|
-
const distFunc =
|
|
51
|
+
const sh = await _initMacromoleculeColumn(DNATable);
|
|
52
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
53
53
|
expect(distFunc, MmDistanceFunctionsNames.LEVENSHTEIN);
|
|
54
54
|
});
|
|
55
55
|
|
|
56
56
|
test('MSA-distance-function', async () => {
|
|
57
|
-
const
|
|
58
|
-
const distFunc =
|
|
57
|
+
const sh = await _initMacromoleculeColumn(MSATable);
|
|
58
|
+
const distFunc = sh.getDistanceFunctionName();
|
|
59
59
|
expect(distFunc, MmDistanceFunctionsNames.HAMMING);
|
|
60
60
|
});
|
|
61
61
|
|
|
@@ -125,7 +125,7 @@ ATCGAATCGA`;
|
|
|
125
125
|
});
|
|
126
126
|
});
|
|
127
127
|
|
|
128
|
-
async function _initMacromoleculeColumn(csv: string): Promise<
|
|
128
|
+
async function _initMacromoleculeColumn(csv: string): Promise<SeqHandler> {
|
|
129
129
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
130
130
|
const seqCol = srcDf.col('seq')!;
|
|
131
131
|
const semType: string = await grok.functions
|
|
@@ -133,8 +133,8 @@ async function _initMacromoleculeColumn(csv: string): Promise<UnitsHandler> {
|
|
|
133
133
|
if (semType)
|
|
134
134
|
seqCol.semType = semType;
|
|
135
135
|
await grok.data.detectSemanticTypes(srcDf);
|
|
136
|
-
const
|
|
137
|
-
return
|
|
136
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
137
|
+
return sh;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
function _testDistance(seq1: string, seq2: string, df: (a: string, b: string) => number, expected: number) {
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -103,13 +103,11 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
103
103
|
|
|
104
104
|
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
105
105
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
106
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
106
107
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
107
108
|
|
|
108
109
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
109
|
-
|
|
110
|
-
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
111
|
-
if (semType)
|
|
112
|
-
srcCol.semType = semType;
|
|
110
|
+
expect(srcCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
113
111
|
|
|
114
112
|
const tgtCol: DG.Column = tgtDf.getCol('seq')!;
|
|
115
113
|
const resCol: DG.Column = await runKalign(srcCol, true);
|
|
@@ -121,23 +119,18 @@ async function _testMSAOnColumn(
|
|
|
121
119
|
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string,
|
|
122
120
|
): Promise<void> {
|
|
123
121
|
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
122
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
124
123
|
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
125
124
|
|
|
126
|
-
const srcSeqCol = srcDf.getCol('seq')!;
|
|
127
125
|
const tgtCol = tgtDf.getCol('seq')!;
|
|
128
126
|
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
if (semType)
|
|
132
|
-
srcCol.semType = semType;
|
|
133
|
-
|
|
134
|
-
await grok.data.detectSemanticTypes(srcDf);
|
|
135
|
-
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
136
|
-
expect(srcSeqCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
127
|
+
expect(srcCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
128
|
+
expect(srcCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
137
129
|
if (alphabet)
|
|
138
|
-
expect(
|
|
130
|
+
expect(srcCol.getTag(bioTAGS.alphabet), alphabet);
|
|
139
131
|
|
|
140
|
-
const msaSeqCol = await multipleSequenceAlignmentUI({col:
|
|
132
|
+
const msaSeqCol = await multipleSequenceAlignmentUI({col: srcCol, pepsea: {method: pepseaMethod}});
|
|
133
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
141
134
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
142
135
|
expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
143
136
|
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
@@ -6,7 +6,7 @@ import wu from 'wu';
|
|
|
6
6
|
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
|
|
8
8
|
import {monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
-
import {
|
|
9
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
10
10
|
|
|
11
11
|
import {_package} from '../package-test';
|
|
12
12
|
|
|
@@ -93,9 +93,9 @@ id3,QHIRE--LT
|
|
|
93
93
|
const charWidth: number = 7;
|
|
94
94
|
const sepWidth: number = 12;
|
|
95
95
|
const colTemp: MonomerPlacer = new MonomerPlacer(null, seqCol, () => {
|
|
96
|
-
const
|
|
96
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
97
97
|
return {
|
|
98
|
-
|
|
98
|
+
seqHandler: sh,
|
|
99
99
|
monomerCharWidth: charWidth,
|
|
100
100
|
separatorWidth: sepWidth,
|
|
101
101
|
monomerToShort: monomerToShort,
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
-
import
|
|
5
|
-
|
|
6
|
-
import {category, expect, test, awaitCheck, delay} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {category, expect, test, delay} from '@datagrok-libraries/utils/src/test';
|
|
7
5
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
9
7
|
|
|
10
8
|
import {importFasta} from '../package';
|
|
11
9
|
import {convertDo} from '../utils/convert';
|
|
@@ -148,8 +146,8 @@ category('renderers', () => {
|
|
|
148
146
|
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
149
147
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
150
148
|
|
|
151
|
-
// check newColumn with
|
|
152
|
-
const
|
|
149
|
+
// check newColumn with SeqHandler constructor
|
|
150
|
+
const _sh: SeqHandler = SeqHandler.forColumn(msaSeqCol);
|
|
153
151
|
}
|
|
154
152
|
|
|
155
153
|
async function _testAfterConvert() {
|
|
@@ -172,8 +170,8 @@ category('renderers', () => {
|
|
|
172
170
|
const resCellRenderer = tgtCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
173
171
|
expect(resCellRenderer, 'sequence');
|
|
174
172
|
|
|
175
|
-
// check tgtCol with
|
|
176
|
-
const
|
|
173
|
+
// check tgtCol with SeqHandler constructor
|
|
174
|
+
const _sh: SeqHandler = SeqHandler.forColumn(tgtCol);
|
|
177
175
|
}
|
|
178
176
|
|
|
179
177
|
async function _selectRendererBySemType() {
|
|
@@ -2,22 +2,23 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import wu from 'wu';
|
|
6
|
+
|
|
5
7
|
import {
|
|
6
|
-
after, before, category, test, expect, expectArray
|
|
8
|
+
after, before, category, test, expect, expectArray
|
|
7
9
|
} from '@datagrok-libraries/utils/src/test';
|
|
8
|
-
import {
|
|
9
|
-
TAGS as bioTAGS,
|
|
10
|
-
splitterAsFasta,
|
|
11
|
-
splitterAsHelm,
|
|
12
|
-
NOTATION
|
|
13
|
-
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
+
import {TAGS as bioTAGS, splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
14
11
|
|
|
15
12
|
import {splitToMonomersUI} from '../utils/split-to-monomers';
|
|
16
13
|
import {awaitGrid} from './utils';
|
|
17
14
|
import * as C from '../utils/constants';
|
|
18
|
-
|
|
19
15
|
import {getHelmMonomers} from '../package';
|
|
20
16
|
|
|
17
|
+
import {splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
18
|
+
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
19
|
+
|
|
20
|
+
import {_package} from '../package-test';
|
|
21
|
+
|
|
21
22
|
category('splitters', async () => {
|
|
22
23
|
before(async () => {
|
|
23
24
|
});
|
|
@@ -132,7 +133,7 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
132
133
|
});
|
|
133
134
|
|
|
134
135
|
// test('helmAsFasta', async () => {
|
|
135
|
-
// // The columns can't be empty for
|
|
136
|
+
// // The columns can't be empty for SeqHandler
|
|
136
137
|
// /* eslint-disable max-len */
|
|
137
138
|
// const srcSeq = '[meI][Pip][dK][Thr_PO3H2][L-hArg(Et,Et)][D-Tyr_Et][Tyr_ab-dehydroMe][dV]EN[D-Orn][D-aThr][Phe_4Me]';
|
|
138
139
|
// const tgtSeqA = ['meI', 'Pip', 'dK', 'Thr_PO3H2', 'L-hArg(Et,Et)', 'D-Tyr_Et', 'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'];
|
|
@@ -143,13 +144,13 @@ PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
|
143
144
|
});
|
|
144
145
|
|
|
145
146
|
export async function _testFastaSplitter(src: string, tgt: string[]) {
|
|
146
|
-
const res:
|
|
147
|
+
const res: ISeqSplitted = splitterAsFasta(src);
|
|
147
148
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
148
|
-
expectArray(res, tgt);
|
|
149
|
+
expectArray(wu(res.originals).toArray(), tgt);
|
|
149
150
|
}
|
|
150
151
|
|
|
151
152
|
export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
152
|
-
const res:
|
|
153
|
+
const res: ISeqSplitted = splitterAsHelm(src);
|
|
153
154
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
154
|
-
expectArray(res, tgt);
|
|
155
|
+
expectArray(wu(res.originals).toArray(), tgt);
|
|
155
156
|
}
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
15
15
|
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
16
16
|
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
17
|
-
import {
|
|
17
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
18
18
|
|
|
19
19
|
import {toAtomicLevel} from '../package';
|
|
20
20
|
import {_package} from '../package-test';
|
|
@@ -198,7 +198,7 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
|
|
|
198
198
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
199
199
|
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
200
200
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.PT);
|
|
201
|
-
const
|
|
201
|
+
const sh = SeqHandler.forColumn(seqCol);
|
|
202
202
|
const resCol = (await _testToAtomicLevel(srcDf, 'seq', monomerLibHelper))!;
|
|
203
203
|
expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
|
|
204
204
|
});
|
|
@@ -2,10 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
4
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {
|
|
5
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
6
|
import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
|
|
8
|
-
category('
|
|
8
|
+
category('SeqHandler: getRegion', () => {
|
|
9
9
|
const data: {
|
|
10
10
|
[testName: string]: {
|
|
11
11
|
srcCsv: string,
|
|
@@ -76,8 +76,8 @@ PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
|
|
|
76
76
|
const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
77
77
|
if (semType) srcSeqCol.semType = semType;
|
|
78
78
|
|
|
79
|
-
const
|
|
80
|
-
const resSeqCol =
|
|
79
|
+
const srcSh = SeqHandler.forColumn(srcSeqCol);
|
|
80
|
+
const resSeqCol = srcSh.getRegion(testData.startIdx, testData.endIdx, 'regSeq');
|
|
81
81
|
|
|
82
82
|
const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
|
|
83
83
|
const tgtSeqCol = tgtDf.getCol('seq');
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
+
import wu from 'wu';
|
|
5
|
+
|
|
4
6
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {
|
|
7
|
+
import {GapOriginals, SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
6
8
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
-
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
8
9
|
|
|
9
10
|
enum Tests {
|
|
10
11
|
fasta = 'fasta',
|
|
@@ -14,10 +15,10 @@ enum Tests {
|
|
|
14
15
|
helm = 'helm',
|
|
15
16
|
}
|
|
16
17
|
|
|
17
|
-
category('
|
|
18
|
-
const fG =
|
|
19
|
-
const hG =
|
|
20
|
-
const sG =
|
|
18
|
+
category('SeqHandler', () => {
|
|
19
|
+
const fG = GapOriginals[NOTATION.FASTA];
|
|
20
|
+
const hG = GapOriginals[NOTATION.HELM];
|
|
21
|
+
const sG = GapOriginals[NOTATION.SEPARATOR];
|
|
21
22
|
const data: {
|
|
22
23
|
[testName: string]: {
|
|
23
24
|
src: { csv: string },
|
|
@@ -34,9 +35,9 @@ TTCAACTTCAAC`
|
|
|
34
35
|
tgt: {
|
|
35
36
|
notation: NOTATION.FASTA,
|
|
36
37
|
splitted: [
|
|
37
|
-
'
|
|
38
|
-
'
|
|
39
|
-
'
|
|
38
|
+
['A', 'C', 'G', 'T', 'C', 'A', 'C', 'G', 'T', 'C'],
|
|
39
|
+
['C', 'A', 'G', 'T', 'G', 'T', 'C', 'A', 'G', 'T', 'G', 'T'],
|
|
40
|
+
['T', 'T', 'C', 'A', 'A', 'C', 'T', 'T', 'C', 'A', 'A', 'C'],
|
|
40
41
|
]
|
|
41
42
|
}
|
|
42
43
|
},
|
|
@@ -51,9 +52,9 @@ ACCGTACTACCGTACT`,
|
|
|
51
52
|
notation: NOTATION.FASTA,
|
|
52
53
|
splitted: [
|
|
53
54
|
//@formatter:off
|
|
54
|
-
'
|
|
55
|
-
'
|
|
56
|
-
'
|
|
55
|
+
['A', 'C', '-', 'G', 'T', '-', 'C', 'T', 'A', 'C', '-', 'G', 'T', '-', 'C', 'T'],
|
|
56
|
+
['C', 'A', 'C', '-', 'T', '-', 'G', 'T', 'C', 'A', 'C', '-', 'T', '-', 'G', 'T'],
|
|
57
|
+
['A', 'C', 'C', 'G', 'T', 'A', 'C', 'T', 'A', 'C', 'C', 'G', 'T', 'A', 'C', 'T'],
|
|
57
58
|
//@formatter:on
|
|
58
59
|
]
|
|
59
60
|
}
|
|
@@ -129,12 +130,13 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.T.dK.Thr_PO3H2}$$$$`
|
|
|
129
130
|
if (semType) col.semType = semType;
|
|
130
131
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
131
132
|
|
|
132
|
-
const
|
|
133
|
-
expect(
|
|
134
|
-
expect(
|
|
133
|
+
const sh = SeqHandler.forColumn(col);
|
|
134
|
+
expect(sh.notation, testData.tgt.notation);
|
|
135
|
+
expect(sh.separator === testData.tgt.separator, true);
|
|
135
136
|
|
|
136
|
-
const resSplitted:
|
|
137
|
+
const resSplitted: string[][] = wu.count(0).take(sh.length)
|
|
138
|
+
.map((rowIdx) => wu(sh.getSplitted(rowIdx).originals).toArray()).toArray();
|
|
137
139
|
expectArray(resSplitted, testData.tgt.splitted);
|
|
138
|
-
}
|
|
140
|
+
});
|
|
139
141
|
}
|
|
140
142
|
});
|
|
@@ -3,8 +3,8 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
import {
|
|
7
|
-
import {ALPHABET, NOTATION
|
|
6
|
+
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
7
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
const seqDna = `seq
|
|
10
10
|
ACGTCACGTC
|
|
@@ -27,67 +27,67 @@ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehyd
|
|
|
27
27
|
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
28
28
|
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
|
|
29
29
|
|
|
30
|
-
category('
|
|
30
|
+
category('SeqHandler', () => {
|
|
31
31
|
test('Seq-Fasta', async () => {
|
|
32
|
-
const [_df,
|
|
33
|
-
expect(
|
|
34
|
-
expect(
|
|
32
|
+
const [_df, sh] = await loadCsvWithDetection(seqDna);
|
|
33
|
+
expect(sh.notation, NOTATION.FASTA);
|
|
34
|
+
expect(sh.isMsa(), false);
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
test('Seq-Fasta-MSA', async () => {
|
|
38
|
-
const [_df,
|
|
39
|
-
expect(
|
|
40
|
-
expect(
|
|
38
|
+
const [_df, sh] = await loadCsvWithDetection(seqDnaMsa);
|
|
39
|
+
expect(sh.notation, NOTATION.FASTA);
|
|
40
|
+
expect(sh.isMsa(), true);
|
|
41
41
|
});
|
|
42
42
|
|
|
43
43
|
test('Seq-Fasta-units', async () => {
|
|
44
|
-
const [_df,
|
|
45
|
-
expect(
|
|
46
|
-
expect(
|
|
44
|
+
const [_df, sh] = await loadCsvWithDetection(seqDna);
|
|
45
|
+
expect(sh.notation, NOTATION.FASTA);
|
|
46
|
+
expect(sh.isMsa(), false);
|
|
47
47
|
});
|
|
48
48
|
|
|
49
49
|
test('Seq-Fasta-MSA-units', async () => {
|
|
50
|
-
const [_df,
|
|
51
|
-
expect(
|
|
52
|
-
expect(
|
|
50
|
+
const [_df, sh] = await loadCsvWithDetection(seqDnaMsa);
|
|
51
|
+
expect(sh.notation, NOTATION.FASTA);
|
|
52
|
+
expect(sh.isMsa(), true);
|
|
53
53
|
});
|
|
54
54
|
|
|
55
55
|
test('Seq-Helm', async () => {
|
|
56
|
-
const [_df,
|
|
57
|
-
expect(
|
|
58
|
-
expect(
|
|
56
|
+
const [_df, sh] = await loadCsvWithDetection(seqHelm);
|
|
57
|
+
expect(sh.notation, NOTATION.HELM);
|
|
58
|
+
expect(sh.isHelm(), true);
|
|
59
59
|
});
|
|
60
60
|
|
|
61
61
|
test('Seq-UN', async () => {
|
|
62
|
-
const [_df,
|
|
63
|
-
expect(
|
|
64
|
-
expect(
|
|
65
|
-
expect(
|
|
62
|
+
const [_df, sh] = await loadCsvWithDetection(seqUn);
|
|
63
|
+
expect(sh.notation, NOTATION.SEPARATOR);
|
|
64
|
+
expect(sh.separator, '-');
|
|
65
|
+
expect(sh.alphabet, ALPHABET.UN);
|
|
66
66
|
});
|
|
67
67
|
|
|
68
68
|
test('Seq-UN-auto', async () => {
|
|
69
|
-
const [_df,
|
|
70
|
-
expect(
|
|
71
|
-
expect(
|
|
72
|
-
expect(
|
|
69
|
+
const [_df, sh] = await loadCsvWithDetection(seqUn);
|
|
70
|
+
expect(sh.notation, NOTATION.SEPARATOR);
|
|
71
|
+
expect(sh.separator, '-');
|
|
72
|
+
expect(sh.alphabet, ALPHABET.UN);
|
|
73
73
|
});
|
|
74
74
|
|
|
75
|
-
async function loadCsvWithDetection(csv: string): Promise<[df: DG.DataFrame,
|
|
75
|
+
async function loadCsvWithDetection(csv: string): Promise<[df: DG.DataFrame, sh: SeqHandler]> {
|
|
76
76
|
const df = DG.DataFrame.fromCsv(csv);
|
|
77
77
|
await grok.data.detectSemanticTypes(df);
|
|
78
|
-
const
|
|
79
|
-
return [df,
|
|
78
|
+
const sh = SeqHandler.forColumn(df.getCol('seq'));
|
|
79
|
+
return [df, sh];
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
// async function loadCsvWithTag(csv: string, tag: string, value: string):
|
|
83
|
-
// Promise<[df: DG.DataFrame, uh:
|
|
83
|
+
// Promise<[df: DG.DataFrame, uh: SeqHandler]> {
|
|
84
84
|
// const df = DG.DataFrame.fromCsv(csv);
|
|
85
85
|
// const col = df.getCol('seq');
|
|
86
86
|
// col.setTag(tag, value);
|
|
87
87
|
// col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
88
88
|
// if (value === NOTATION.SEPARATOR)
|
|
89
89
|
// col.setTag(TAGS.separator, '-');
|
|
90
|
-
// const
|
|
91
|
-
// return [df,
|
|
90
|
+
// const sh = SeqHandler.forColumn(df.getCol('seq'));
|
|
91
|
+
// return [df, sh];
|
|
92
92
|
// }
|
|
93
93
|
});
|