@datagrok/bio 2.4.31 → 2.4.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +6 -8
- package/README.md +22 -7
- package/detectors.js +21 -12
- package/dist/1.js +2 -0
- package/dist/1.js.map +1 -0
- package/dist/18.js +2 -0
- package/dist/18.js.map +1 -0
- package/dist/190.js +2 -0
- package/dist/190.js.map +1 -0
- package/dist/452.js +2 -0
- package/dist/452.js.map +1 -0
- package/dist/729.js +2 -0
- package/dist/729.js.map +1 -0
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/libraries/broken-lib.sdf +136 -0
- package/files/libraries/group1/mock-lib-3.json +74 -0
- package/files/libraries/mock-lib-2.json +48 -0
- package/files/tests/100_3_clustests.csv +100 -0
- package/files/tests/100_3_clustests_empty_vals.csv +100 -0
- package/files/tests/peptides_motif-with-random_10000.csv +9998 -0
- package/package.json +4 -4
- package/scripts/sequence_generator.py +164 -48
- package/src/analysis/sequence-activity-cliffs.ts +7 -9
- package/src/analysis/sequence-diversity-viewer.ts +8 -3
- package/src/analysis/sequence-search-base-viewer.ts +4 -3
- package/src/analysis/sequence-similarity-viewer.ts +13 -7
- package/src/analysis/sequence-space.ts +15 -12
- package/src/analysis/workers/mm-distance-array-service.ts +48 -0
- package/src/analysis/workers/mm-distance-array-worker.ts +29 -0
- package/src/analysis/workers/mm-distance-worker-creator.ts +6 -9
- package/src/apps/web-logo-app.ts +34 -0
- package/src/calculations/monomerLevelMols.ts +10 -12
- package/src/demo/bio01-similarity-diversity.ts +4 -5
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +6 -7
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +7 -8
- package/src/demo/bio03-atomic-level.ts +1 -4
- package/src/demo/bio05-helm-msa-sequence-space.ts +6 -4
- package/src/demo/utils.ts +3 -4
- package/src/package-test.ts +1 -2
- package/src/package.ts +135 -82
- package/src/seq_align.ts +482 -483
- package/src/substructure-search/substructure-search.ts +3 -3
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +12 -35
- package/src/tests/_first-tests.ts +1 -1
- package/src/tests/activity-cliffs-tests.ts +10 -7
- package/src/tests/activity-cliffs-utils.ts +6 -5
- package/src/tests/bio-tests.ts +20 -25
- package/src/tests/checkInputColumn-tests.ts +5 -11
- package/src/tests/converters-test.ts +19 -37
- package/src/tests/detectors-benchmark-tests.ts +35 -37
- package/src/tests/detectors-tests.ts +29 -34
- package/src/tests/detectors-weak-and-likely-tests.ts +11 -21
- package/src/tests/fasta-export-tests.ts +3 -3
- package/src/tests/fasta-handler-test.ts +2 -3
- package/src/tests/lib-tests.ts +2 -4
- package/src/tests/mm-distance-tests.ts +25 -17
- package/src/tests/monomer-libraries-tests.ts +1 -1
- package/src/tests/msa-tests.ts +12 -9
- package/src/tests/pepsea-tests.ts +6 -3
- package/src/tests/renderers-test.ts +13 -11
- package/src/tests/sequence-space-test.ts +10 -8
- package/src/tests/sequence-space-utils.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +47 -61
- package/src/tests/splitters-test.ts +14 -20
- package/src/tests/to-atomic-level-tests.ts +9 -17
- package/src/tests/units-handler-splitted-tests.ts +106 -0
- package/src/tests/units-handler-tests.ts +22 -26
- package/src/tests/utils/sequences-generators.ts +6 -2
- package/src/tests/utils.ts +10 -4
- package/src/tests/viewers.ts +1 -1
- package/src/utils/atomic-works.ts +49 -57
- package/src/utils/cell-renderer.ts +25 -8
- package/src/utils/check-input-column.ts +19 -4
- package/src/utils/constants.ts +3 -3
- package/src/utils/convert.ts +56 -23
- package/src/utils/monomer-lib.ts +83 -64
- package/src/utils/multiple-sequence-alignment-ui.ts +24 -21
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +17 -7
- package/src/utils/save-as-fasta.ts +11 -4
- package/src/utils/ui-utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +21 -22
- package/src/viewers/web-logo-viewer.ts +189 -154
- package/src/widgets/bio-substructure-filter.ts +9 -6
- package/src/widgets/representations.ts +11 -12
- package/tsconfig.json +1 -1
- package/dist/258.js +0 -2
- package/dist/258.js.map +0 -1
- package/dist/457.js +0 -2
- package/dist/457.js.map +0 -1
- package/dist/562.js +0 -2
- package/dist/562.js.map +0 -1
- package/dist/925.js +0 -2
- package/dist/925.js.map +0 -1
- package/src/analysis/workers/mm-distance-worker.ts +0 -16
|
@@ -7,7 +7,6 @@ import {importFasta} from '../package';
|
|
|
7
7
|
import {convertDo} from '../utils/convert';
|
|
8
8
|
import * as C from '../utils/constants';
|
|
9
9
|
import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
|
|
10
|
-
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
11
10
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
11
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
13
12
|
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
@@ -29,35 +28,38 @@ category('renderers', () => {
|
|
|
29
28
|
|
|
30
29
|
test('long sequence performance ', async () => {
|
|
31
30
|
performanceTest(generateLongSequence, 'Long sequences');
|
|
32
|
-
});
|
|
31
|
+
}, {skipReason: 'GROK-13300'});
|
|
33
32
|
|
|
34
33
|
test('many sequence performance', async () => {
|
|
35
34
|
performanceTest(generateManySequences, 'Many sequences');
|
|
36
|
-
});
|
|
35
|
+
}, {skipReason: 'GROK-13300'});
|
|
36
|
+
test('many sequence performance', async () => {
|
|
37
|
+
performanceTest(generateManySequences, 'Many sequences');
|
|
38
|
+
}, {skipReason: 'GROK-13300'});
|
|
37
39
|
|
|
38
40
|
test('rendererMacromoleculeFasta', async () => {
|
|
39
41
|
await _rendererMacromoleculeFasta();
|
|
40
|
-
});
|
|
42
|
+
}, {skipReason: 'GROK-13300'});
|
|
41
43
|
|
|
42
44
|
test('rendererMacromoleculeSeparator', async () => {
|
|
43
45
|
await _rendererMacromoleculeSeparator();
|
|
44
|
-
});
|
|
46
|
+
}, {skipReason: 'GROK-13300'});
|
|
45
47
|
|
|
46
48
|
test('rendererMacromoleculeDifference', async () => {
|
|
47
49
|
await _rendererMacromoleculeDifference();
|
|
48
|
-
});
|
|
50
|
+
}, {skipReason: 'GROK-13300'});
|
|
49
51
|
|
|
50
52
|
test('afterMsa', async () => {
|
|
51
53
|
await _testAfterMsa();
|
|
52
|
-
});
|
|
54
|
+
}, {skipReason: 'GROK-13300'});
|
|
53
55
|
|
|
54
56
|
test('afterConvert', async () => {
|
|
55
57
|
await _testAfterConvert();
|
|
56
|
-
});
|
|
58
|
+
}, {skipReason: 'GROK-13300'});
|
|
57
59
|
|
|
58
60
|
test('selectRendererBySemType', async () => {
|
|
59
61
|
await _selectRendererBySemType();
|
|
60
|
-
});
|
|
62
|
+
}, {skipReason: 'GROK-13300'});
|
|
61
63
|
|
|
62
64
|
test('setRendererManually', async () => {
|
|
63
65
|
await _setRendererManually();
|
|
@@ -156,7 +158,7 @@ category('renderers', () => {
|
|
|
156
158
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
157
159
|
|
|
158
160
|
// check newColumn with UnitsHandler constructor
|
|
159
|
-
const
|
|
161
|
+
const _uh: UnitsHandler = UnitsHandler.getOrCreate(msaSeqCol);
|
|
160
162
|
|
|
161
163
|
dfList.push(df);
|
|
162
164
|
tvList.push(tv);
|
|
@@ -184,7 +186,7 @@ category('renderers', () => {
|
|
|
184
186
|
expect(resCellRenderer, 'sequence');
|
|
185
187
|
|
|
186
188
|
// check tgtCol with UnitsHandler constructor
|
|
187
|
-
const
|
|
189
|
+
const _uh: UnitsHandler = UnitsHandler.getOrCreate(tgtCol);
|
|
188
190
|
}
|
|
189
191
|
|
|
190
192
|
async function _selectRendererBySemType() {
|
|
@@ -2,10 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {readDataframe} from './utils';
|
|
7
7
|
import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
|
|
8
|
-
import {
|
|
8
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
9
9
|
|
|
10
10
|
category('sequenceSpace', async () => {
|
|
11
11
|
let testFastaDf: DG.DataFrame;
|
|
@@ -14,18 +14,20 @@ category('sequenceSpace', async () => {
|
|
|
14
14
|
let testHelmWithEmptyRowsTableView: DG.TableView;
|
|
15
15
|
|
|
16
16
|
test('sequenceSpaceOpens', async () => {
|
|
17
|
-
testFastaDf = await readDataframe(
|
|
17
|
+
testFastaDf = await readDataframe(
|
|
18
|
+
DG.Test.isInBenchmark ? 'test/peptides_motif-with-random_10000.csv' : 'tests/100_3_clustests.csv',
|
|
19
|
+
);
|
|
18
20
|
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
19
|
-
await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, '
|
|
21
|
+
await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'sequence');
|
|
20
22
|
grok.shell.closeTable(testFastaDf);
|
|
21
23
|
testFastaTableView.close();
|
|
22
|
-
}
|
|
24
|
+
});
|
|
23
25
|
|
|
24
26
|
test('sequenceSpaceWithEmptyRows', async () => {
|
|
25
|
-
testHelmWithEmptyRows = await readDataframe('tests/
|
|
27
|
+
testHelmWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
|
26
28
|
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
27
|
-
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, '
|
|
29
|
+
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'sequence');
|
|
28
30
|
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
29
31
|
testHelmWithEmptyRowsTableView.close();
|
|
30
|
-
}
|
|
32
|
+
});
|
|
31
33
|
});
|
|
@@ -2,10 +2,12 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
import {sequenceSpaceTopMenu} from '../package';
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
5
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
7
|
|
|
8
|
-
export async function _testSequenceSpaceReturnsResult(
|
|
8
|
+
export async function _testSequenceSpaceReturnsResult(
|
|
9
|
+
df: DG.DataFrame, algorithm: DimReductionMethods, colName: string,
|
|
10
|
+
) {
|
|
9
11
|
// await grok.data.detectSemanticTypes(df);
|
|
10
12
|
const col: DG.Column = df.getCol(colName);
|
|
11
13
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
@@ -14,4 +16,4 @@ export async function _testSequenceSpaceReturnsResult(df: DG.DataFrame, algorith
|
|
|
14
16
|
|
|
15
17
|
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true);
|
|
16
18
|
expect(sp != null, true);
|
|
17
|
-
}
|
|
19
|
+
}
|
|
@@ -1,75 +1,73 @@
|
|
|
1
|
-
import {after, before, category, test, expect,
|
|
1
|
+
import {after, before, category, test, expect, awaitCheck} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
-
import {createTableView
|
|
3
|
+
import {createTableView} from './utils';
|
|
4
4
|
import * as grok from 'datagrok-api/grok';
|
|
5
5
|
import {SequenceSimilarityViewer} from '../analysis/sequence-similarity-viewer';
|
|
6
6
|
|
|
7
|
-
let viewList: DG.ViewBase[];
|
|
8
|
-
let dfList: DG.DataFrame[];
|
|
9
|
-
|
|
10
|
-
|
|
11
7
|
category('similarity/diversity', async () => {
|
|
12
|
-
|
|
13
8
|
before(async () => {
|
|
14
|
-
|
|
15
|
-
dfList = [];
|
|
9
|
+
// grok.shell.closeAll();
|
|
16
10
|
});
|
|
17
11
|
|
|
18
12
|
after(async () => {
|
|
19
|
-
|
|
20
|
-
for (const df of dfList) grok.shell.closeTable(df);
|
|
13
|
+
grok.shell.closeAll();
|
|
21
14
|
});
|
|
22
15
|
|
|
23
|
-
|
|
24
16
|
test('similaritySearchViewer', async () => {
|
|
25
17
|
await _testSimilaritySearchViewer();
|
|
26
18
|
});
|
|
19
|
+
|
|
27
20
|
test('diversitySearchViewer', async () => {
|
|
28
21
|
await _testDiversitySearchViewer();
|
|
29
22
|
});
|
|
30
23
|
});
|
|
31
24
|
|
|
32
25
|
async function _testSimilaritySearchViewer() {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me');
|
|
57
|
-
}
|
|
26
|
+
try {
|
|
27
|
+
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
28
|
+
const viewer = molecules.addViewer('Sequence Similarity Search');
|
|
29
|
+
await awaitCheck(() => getSearchViewer(viewer, 'Sequence Similarity Search') !== undefined,
|
|
30
|
+
'Sequence Similarity Search has not been created', 5000);
|
|
31
|
+
const similaritySearchViewer: SequenceSimilarityViewer = getSearchViewer(viewer, 'Sequence Similarity Search');
|
|
32
|
+
await awaitCheck(() => similaritySearchViewer.root.getElementsByClassName('d4-grid').length !== 0,
|
|
33
|
+
'Sequence Similarity Search has not been created', 5000);
|
|
34
|
+
expect(similaritySearchViewer.fingerprint, 'Morgan');
|
|
35
|
+
expect(similaritySearchViewer.distanceMetric, 'Tanimoto');
|
|
36
|
+
expect(similaritySearchViewer.scores!.get(0), DG.FLOAT_NULL);
|
|
37
|
+
expect(similaritySearchViewer.idxs!.get(0), 0);
|
|
38
|
+
expect(similaritySearchViewer.molCol!.get(0),
|
|
39
|
+
'D-Tyr_Et/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
40
|
+
expect(similaritySearchViewer.scores!.get(1), 0.4722222089767456);
|
|
41
|
+
expect(similaritySearchViewer.idxs!.get(1), 11);
|
|
42
|
+
expect(similaritySearchViewer.molCol!.get(1),
|
|
43
|
+
'meI/hHis//Aca/meM/Tyr_ab-dehydroMe/dV/E/N/D-Orn/D-aThr//Phe_4Me');
|
|
44
|
+
molecules.dataFrame.currentRowIdx = 1;
|
|
45
|
+
await awaitCheck(() => similaritySearchViewer.targetMoleculeIdx === 1, 'Target molecule has not been changed', 5000);
|
|
46
|
+
await awaitCheck(() => similaritySearchViewer.molCol!.get(0) ===
|
|
47
|
+
'meI/hHis/Aca/Cys_SEt/T/dK/Thr_PO3H2/Aca/Tyr_PO3H2/D-Chg/dV/Phe_ab-dehydro/N/D-Orn/D-aThr//Phe_4Me',
|
|
48
|
+
'Incorrect first similar molecule', 5000);
|
|
58
49
|
|
|
50
|
+
} finally {
|
|
51
|
+
grok.shell.closeAll();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
59
54
|
|
|
60
55
|
async function _testDiversitySearchViewer() {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
56
|
+
try {
|
|
57
|
+
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
58
|
+
const viewer = molecules.addViewer('Sequence Diversity Search');
|
|
59
|
+
await awaitCheck(() => getSearchViewer(viewer, 'Sequence Diversity Search') !== undefined,
|
|
60
|
+
'Sequence Diversity Search has not been created', 5000);
|
|
61
|
+
const diversitySearchviewer = getSearchViewer(viewer, 'Sequence Diversity Search');
|
|
62
|
+
await awaitCheck(() => diversitySearchviewer.root.getElementsByClassName('d4-grid').length !== 0,
|
|
63
|
+
'Sequence Diversity Search has not been created', 5000);
|
|
64
|
+
expect(diversitySearchviewer.fingerprint, 'Morgan');
|
|
65
|
+
expect(diversitySearchviewer.distanceMetric, 'Tanimoto');
|
|
66
|
+
expect(diversitySearchviewer.initialized, true);
|
|
67
|
+
expect(diversitySearchviewer.renderMolIds.length > 0, true);
|
|
68
|
+
} finally {
|
|
69
|
+
grok.shell.closeAll();
|
|
70
|
+
}
|
|
73
71
|
}
|
|
74
72
|
|
|
75
73
|
function getSearchViewer(viewer: DG.Viewer, name: string) {
|
|
@@ -80,15 +78,3 @@ function getSearchViewer(viewer: DG.Viewer, name: string) {
|
|
|
80
78
|
}
|
|
81
79
|
}
|
|
82
80
|
|
|
83
|
-
async function waitForCompute(viewer: SequenceSimilarityViewer) {
|
|
84
|
-
const t = new Promise((resolve, reject) => {
|
|
85
|
-
viewer.computeCompleted.subscribe(async (_: any) => {
|
|
86
|
-
try {
|
|
87
|
-
resolve(true);
|
|
88
|
-
} catch (error) {
|
|
89
|
-
reject(error);
|
|
90
|
-
}
|
|
91
|
-
});
|
|
92
|
-
});
|
|
93
|
-
await t;
|
|
94
|
-
}
|
|
@@ -2,66 +2,58 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {after, before, category, test, expect, expectArray,
|
|
5
|
+
import {after, before, category, test, expect, expectArray, delay} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import * as C from '../utils/constants';
|
|
7
7
|
import {_package, getHelmMonomers} from '../package';
|
|
8
|
-
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
9
8
|
import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
category('splitters', () => {
|
|
13
|
-
let tvList: DG.TableView[];
|
|
14
|
-
let dfList: DG.DataFrame[];
|
|
15
|
-
|
|
11
|
+
category('splitters', async () => {
|
|
16
12
|
before(async () => {
|
|
17
|
-
tvList = [];
|
|
18
|
-
dfList = [];
|
|
19
13
|
});
|
|
20
14
|
|
|
21
15
|
after(async () => {
|
|
22
|
-
dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
23
|
-
tvList.forEach((tv: DG.TableView) => tv.close());
|
|
24
16
|
});
|
|
25
17
|
|
|
26
|
-
const
|
|
18
|
+
const _helm1 = 'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$';
|
|
27
19
|
|
|
28
|
-
const
|
|
20
|
+
const _helm2 = 'PEPTIDE1{meI.hHis.Hcy.Q.T.W.Q.Phe_4NH2.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.N.meK}$$$';
|
|
29
21
|
|
|
30
22
|
const data: { [key: string]: [string, string[]] } = {
|
|
31
23
|
fastaMulti: [
|
|
32
24
|
'M[MeI]YKETLL[MeF]PKTDFPMRGGL[MeA]',
|
|
33
25
|
['M', 'MeI', 'Y', 'K', 'E', 'T', 'L', 'L', 'MeF', 'P',
|
|
34
|
-
'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA']
|
|
26
|
+
'K', 'T', 'D', 'F', 'P', 'M', 'R', 'G', 'G', 'L', 'MeA'],
|
|
35
27
|
],
|
|
36
28
|
helm1: [
|
|
37
29
|
'PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$',
|
|
38
30
|
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et',
|
|
39
|
-
'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me']
|
|
31
|
+
'Tyr_ab-dehydroMe', 'dV', 'E', 'N', 'D-Orn', 'D-aThr', 'Phe_4Me'],
|
|
40
32
|
],
|
|
41
33
|
helm2: [
|
|
42
34
|
'PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.D-Dap.dV.E.N.pnG.Phe_4Me}$$$',
|
|
43
35
|
['meI', 'hHis', 'Aca', 'N', 'T', 'dK', 'Thr_PO3H2', 'Aca',
|
|
44
|
-
'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me']
|
|
36
|
+
'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me'],
|
|
45
37
|
],
|
|
46
38
|
// HELM editor dialog returns HELM string with multichar monomer names in square brackets
|
|
47
39
|
helm3: [
|
|
48
40
|
'PEPTIDE1{[meI].[hHis].[Aca].N.T.[dK].[Thr_PO3H2].[Aca].[D-Tyr_Et].[D-Dap].[dV].E.N.[pnG].[Phe_4Me]}$$$',
|
|
49
41
|
['meI', 'hHis', 'Aca', 'N', 'T', 'dK', 'Thr_PO3H2', 'Aca',
|
|
50
|
-
'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me']
|
|
42
|
+
'D-Tyr_Et', 'D-Dap', 'dV', 'E', 'N', 'pnG', 'Phe_4Me'],
|
|
51
43
|
],
|
|
52
44
|
|
|
53
45
|
testHelm1: [
|
|
54
46
|
'RNA1{R(U)P.R(T)P.R(G)P.R(C)P.R(A)}$$$$',
|
|
55
|
-
['R(U)P', 'R(T)P', 'R(G)P', 'R(C)P', 'R(A)']
|
|
47
|
+
['R(U)P', 'R(T)P', 'R(G)P', 'R(C)P', 'R(A)'],
|
|
56
48
|
],
|
|
57
49
|
|
|
58
50
|
testHelm2: [
|
|
59
51
|
'RNA1{P.R(U)P.R(T)}$$$$',
|
|
60
|
-
['P', 'R(U)P', 'R(T)']
|
|
52
|
+
['P', 'R(U)P', 'R(T)'],
|
|
61
53
|
],
|
|
62
54
|
testHelm3: [
|
|
63
55
|
'RNA1{P.R(U).P.R(T)}$$$$',
|
|
64
|
-
['P', 'R(U)', 'P', 'R(T)']
|
|
56
|
+
['P', 'R(U)', 'P', 'R(T)'],
|
|
65
57
|
],
|
|
66
58
|
};
|
|
67
59
|
|
|
@@ -85,7 +77,8 @@ category('splitters', () => {
|
|
|
85
77
|
seqCol.semType = semType;
|
|
86
78
|
seqCol.setTag(bioTAGS.aligned, C.MSA);
|
|
87
79
|
|
|
88
|
-
const
|
|
80
|
+
const _tv: DG.TableView = grok.shell.addTableView(df);
|
|
81
|
+
await delay(500); // needed to account for table adding
|
|
89
82
|
// call to calculate 'cell.renderer' tag
|
|
90
83
|
await grok.data.detectSemanticTypes(df);
|
|
91
84
|
|
|
@@ -99,6 +92,7 @@ category('splitters', () => {
|
|
|
99
92
|
PEPTIDE1{hHis.N.T}$$$,5.30751
|
|
100
93
|
PEPTIDE1{hHis.Aca.Cys_SEt}$$$,5.72388
|
|
101
94
|
`);
|
|
95
|
+
await grok.data.detectSemanticTypes(df);
|
|
102
96
|
const expectedMonomerList = ['hHis', 'Aca', 'Cys_SEt', 'N', 'T'];
|
|
103
97
|
|
|
104
98
|
const helmCol: DG.Column = df.getCol('HELM');
|
|
@@ -95,44 +95,36 @@ category('toAtomicLevel', async () => {
|
|
|
95
95
|
[csvTests.fastaDna]: `seq
|
|
96
96
|
ACGTC
|
|
97
97
|
CAGTGT
|
|
98
|
-
TTCAAC
|
|
99
|
-
`,
|
|
98
|
+
TTCAAC`,
|
|
100
99
|
[csvTests.fastaRna]: `seq
|
|
101
100
|
ACGUC
|
|
102
101
|
CAGUGU
|
|
103
|
-
UUCAAC
|
|
104
|
-
`,
|
|
102
|
+
UUCAAC`,
|
|
105
103
|
[csvTests.fastaPt]: `seq
|
|
106
104
|
FWPHEY
|
|
107
105
|
YNRQWYV
|
|
108
|
-
MKPSEYV
|
|
109
|
-
`,
|
|
106
|
+
MKPSEYV`,
|
|
110
107
|
[csvTests.separatorDna]: `seq
|
|
111
108
|
A/C/G/T/C
|
|
112
109
|
C/A/G/T/G/T
|
|
113
|
-
T/T/C/A/A/C
|
|
114
|
-
`,
|
|
110
|
+
T/T/C/A/A/C`,
|
|
115
111
|
[csvTests.separatorRna]: `seq
|
|
116
112
|
A*C*G*U*C
|
|
117
113
|
C*A*G*U*G*U
|
|
118
|
-
U*U*C*A*A*C
|
|
119
|
-
`,
|
|
114
|
+
U*U*C*A*A*C`,
|
|
120
115
|
[csvTests.separatorPt]: `seq
|
|
121
116
|
F-W-P-H-E-Y
|
|
122
117
|
Y-N-R-Q-W-Y-V
|
|
123
|
-
M-K-P-S-E-Y-V
|
|
124
|
-
`,
|
|
118
|
+
M-K-P-S-E-Y-V`,
|
|
125
119
|
[csvTests.separatorUn]: `seq
|
|
126
120
|
meI-hHis-Aca-N-T-dE-Thr_PO3H2-Aca-D
|
|
127
121
|
meI-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
128
|
-
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2
|
|
129
|
-
`,
|
|
122
|
+
Lys_Boc-hHis-Aca-Cys_SEt-T-dK-Thr_PO3H2-Aca-Tyr_PO3H2`,
|
|
130
123
|
|
|
131
124
|
[csvTests.helm]: `seq
|
|
132
125
|
PEPTIDE1{meI.D-gGlu.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$
|
|
133
126
|
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
134
|
-
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}
|
|
135
|
-
`,
|
|
127
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$`,
|
|
136
128
|
};
|
|
137
129
|
|
|
138
130
|
/** Also detects semantic types
|
|
@@ -183,5 +175,5 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$
|
|
|
183
175
|
async function _testToAtomicLevel(df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper) {
|
|
184
176
|
const seqCol: DG.Column<string> = df.getCol(seqColName);
|
|
185
177
|
const monomerLib: IMonomerLib = monomerLibHelper.getBioLib();
|
|
186
|
-
const
|
|
178
|
+
const _resCol = await _toAtomicLevel(df, seqCol, monomerLib);
|
|
187
179
|
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
|
+
|
|
8
|
+
category('UnitsHandler', () => {
|
|
9
|
+
const data: { [testName: string]: { src: { csv: string }, tgt: { splitted: string[][] } } } = {
|
|
10
|
+
fasta: {
|
|
11
|
+
src: {
|
|
12
|
+
csv: `seq
|
|
13
|
+
ACGTC
|
|
14
|
+
CAGTGT
|
|
15
|
+
TTCAAC`
|
|
16
|
+
},
|
|
17
|
+
tgt: {
|
|
18
|
+
splitted: [
|
|
19
|
+
['A', 'C', 'G', 'T', 'C'],
|
|
20
|
+
['C', 'A', 'G', 'T', 'G', 'T'],
|
|
21
|
+
['T', 'T', 'C', 'A', 'A', 'C']
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
fastaMsa: {
|
|
26
|
+
src: {
|
|
27
|
+
csv: `seq
|
|
28
|
+
AC-GT-CT
|
|
29
|
+
CAC-T-GT
|
|
30
|
+
ACCGTACT`,
|
|
31
|
+
},
|
|
32
|
+
tgt: {
|
|
33
|
+
splitted: [
|
|
34
|
+
//@formatter:off
|
|
35
|
+
['A', 'C', '' , 'G', 'T', '' , 'C', 'T'],
|
|
36
|
+
['C', 'A', 'C', '' , 'T', '' , 'G', 'T'],
|
|
37
|
+
['A', 'C', 'C', 'G', 'T', 'A', 'C', 'T'],
|
|
38
|
+
//@formatter:on
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
separator: {
|
|
43
|
+
src: {
|
|
44
|
+
csv: `seq
|
|
45
|
+
abc-dfgg-abc1-cfr3-rty-wert
|
|
46
|
+
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
47
|
+
rut12-rty-her2-abc-cfr3-wert-rut12`,
|
|
48
|
+
},
|
|
49
|
+
tgt: {
|
|
50
|
+
splitted: [
|
|
51
|
+
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
52
|
+
['rut12', 'her2', 'rty', 'wert', 'abc', 'abc1', 'dfgg'],
|
|
53
|
+
['rut12', 'rty', 'her2', 'abc', 'cfr3', 'wert', 'rut12']
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
separatorMsa: {
|
|
59
|
+
src: {
|
|
60
|
+
csv: `seq
|
|
61
|
+
abc-dfgg-abc1-cfr3-rty-wert
|
|
62
|
+
rut12-her2-rty--abc1-dfgg
|
|
63
|
+
rut12-rty-her2---wert`
|
|
64
|
+
},
|
|
65
|
+
tgt: {
|
|
66
|
+
splitted: [
|
|
67
|
+
['abc', 'dfgg', 'abc1', 'cfr3', 'rty', 'wert'],
|
|
68
|
+
['rut12', 'her2', 'rty', '', 'abc1', 'dfgg'],
|
|
69
|
+
['rut12', 'rty', 'her2', '', '', 'wert'],
|
|
70
|
+
]
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
helm: {
|
|
74
|
+
src: {
|
|
75
|
+
csv: `seq
|
|
76
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et}$$$$
|
|
77
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
|
|
78
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca}$$$$
|
|
79
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2}$$$$`
|
|
80
|
+
},
|
|
81
|
+
tgt: {
|
|
82
|
+
splitted: [
|
|
83
|
+
['meI', 'hHis', 'Aca', 'N', 'T', 'dE', 'Thr_PO3H2', 'Aca', 'D-Tyr_Et'],
|
|
84
|
+
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
85
|
+
['Lys_Boc', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2', 'Aca'],
|
|
86
|
+
['meI', 'hHis', 'Aca', 'Cys_SEt', 'T', 'dK', 'Thr_PO3H2'],
|
|
87
|
+
]
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
for (const [testName, testData] of Object.entries(data)) {
|
|
93
|
+
test(`splitted-${testName}`, async () => {
|
|
94
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(testData.src.csv);
|
|
95
|
+
const col: DG.Column = df.getCol('seq');
|
|
96
|
+
|
|
97
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
98
|
+
if (semType) col.semType = semType;
|
|
99
|
+
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
100
|
+
|
|
101
|
+
const uh = UnitsHandler.getOrCreate(col);
|
|
102
|
+
const splitted: string[][] = uh.splitted;
|
|
103
|
+
expectArray(splitted, testData.tgt.splitted);
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
});
|
|
@@ -2,75 +2,71 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import {category, test, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
7
7
|
import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
const seqDna = `seq
|
|
10
10
|
ACGTC
|
|
11
11
|
CAGTGT
|
|
12
|
-
TTCAAC
|
|
13
|
-
`;
|
|
12
|
+
TTCAAC`;
|
|
14
13
|
|
|
15
14
|
const seqDnaMsa = `seq
|
|
16
15
|
AC-GT-CT
|
|
17
16
|
CAC-T-GT
|
|
18
|
-
ACCGTACT
|
|
19
|
-
`;
|
|
17
|
+
ACCGTACT`;
|
|
20
18
|
|
|
21
19
|
const seqUn = `seq
|
|
22
20
|
abc-dfgg-abc1-cfr3-rty-wert
|
|
23
21
|
rut12-her2-rty-wert-abc-abc1-dfgg
|
|
24
|
-
rut12-rty-her2-abc-cfr3-wert-rut12
|
|
25
|
-
`;
|
|
22
|
+
rut12-rty-her2-abc-cfr3-wert-rut12`;
|
|
26
23
|
|
|
27
24
|
const seqHelm = `seq
|
|
28
25
|
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
29
26
|
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
30
27
|
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
31
|
-
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}
|
|
32
|
-
`;
|
|
28
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
|
|
33
29
|
|
|
34
|
-
category('UnitsHandler', () =>{
|
|
35
|
-
test('Seq-Fasta', async () =>{
|
|
36
|
-
const [
|
|
30
|
+
category('UnitsHandler', () => {
|
|
31
|
+
test('Seq-Fasta', async () => {
|
|
32
|
+
const [_df, uh] = await loadCsvWithDetection(seqDna);
|
|
37
33
|
expect(uh.notation, NOTATION.FASTA);
|
|
38
34
|
expect(uh.isMsa(), false);
|
|
39
35
|
});
|
|
40
36
|
|
|
41
|
-
test('Seq-Fasta-MSA', async () =>{
|
|
42
|
-
const [
|
|
37
|
+
test('Seq-Fasta-MSA', async () => {
|
|
38
|
+
const [_df, uh] = await loadCsvWithDetection(seqDnaMsa);
|
|
43
39
|
expect(uh.notation, NOTATION.FASTA);
|
|
44
40
|
expect(uh.isMsa(), true);
|
|
45
41
|
});
|
|
46
42
|
|
|
47
|
-
test('Seq-Fasta-units', async () =>{
|
|
48
|
-
const [
|
|
43
|
+
test('Seq-Fasta-units', async () => {
|
|
44
|
+
const [_df, uh] = await loadCsvWithTag(seqDna, DG.TAGS.UNITS, NOTATION.FASTA);
|
|
49
45
|
expect(uh.notation, NOTATION.FASTA);
|
|
50
46
|
expect(uh.isMsa(), false);
|
|
51
47
|
});
|
|
52
48
|
|
|
53
|
-
test('Seq-Fasta-MSA-units', async () =>{
|
|
54
|
-
const [
|
|
49
|
+
test('Seq-Fasta-MSA-units', async () => {
|
|
50
|
+
const [_df, uh] = await loadCsvWithTag(seqDnaMsa, DG.TAGS.UNITS, NOTATION.FASTA);
|
|
55
51
|
expect(uh.notation, NOTATION.FASTA);
|
|
56
52
|
expect(uh.isMsa(), true);
|
|
57
53
|
});
|
|
58
54
|
|
|
59
|
-
test('Seq-Helm', async () =>{
|
|
60
|
-
const [
|
|
55
|
+
test('Seq-Helm', async () => {
|
|
56
|
+
const [_df, uh] = await loadCsvWithTag(seqHelm, DG.TAGS.UNITS, NOTATION.HELM);
|
|
61
57
|
expect(uh.notation, NOTATION.HELM);
|
|
62
58
|
expect(uh.isHelm(), true);
|
|
63
59
|
});
|
|
64
60
|
|
|
65
|
-
test('Seq-UN', async () =>{
|
|
66
|
-
const [
|
|
61
|
+
test('Seq-UN', async () => {
|
|
62
|
+
const [_df, uh] = await loadCsvWithTag(seqUn, DG.TAGS.UNITS, NOTATION.SEPARATOR);
|
|
67
63
|
expect(uh.notation, NOTATION.SEPARATOR);
|
|
68
64
|
expect(uh.separator, '-');
|
|
69
65
|
expect(uh.alphabet, ALPHABET.UN);
|
|
70
66
|
});
|
|
71
67
|
|
|
72
|
-
test('Seq-UN-auto', async () =>{
|
|
73
|
-
const [
|
|
68
|
+
test('Seq-UN-auto', async () => {
|
|
69
|
+
const [_df, uh] = await loadCsvWithDetection(seqUn);
|
|
74
70
|
expect(uh.notation, NOTATION.SEPARATOR);
|
|
75
71
|
expect(uh.separator, '-');
|
|
76
72
|
expect(uh.alphabet, ALPHABET.UN);
|
|
@@ -79,7 +75,7 @@ category('UnitsHandler', () =>{
|
|
|
79
75
|
async function loadCsvWithDetection(csv: string): Promise<[df: DG.DataFrame, uh: UnitsHandler]> {
|
|
80
76
|
const df = DG.DataFrame.fromCsv(csv);
|
|
81
77
|
await grok.data.detectSemanticTypes(df);
|
|
82
|
-
const uh =
|
|
78
|
+
const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
|
|
83
79
|
return [df, uh];
|
|
84
80
|
}
|
|
85
81
|
|
|
@@ -91,7 +87,7 @@ category('UnitsHandler', () =>{
|
|
|
91
87
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
92
88
|
if (value === NOTATION.SEPARATOR)
|
|
93
89
|
col.setTag(TAGS.separator, '-');
|
|
94
|
-
const uh =
|
|
90
|
+
const uh = UnitsHandler.getOrCreate(df.getCol('seq'));
|
|
95
91
|
return [df, uh];
|
|
96
92
|
}
|
|
97
93
|
});
|