@datagrok/bio 2.1.12 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +97 -69
- package/dist/package-test.js +2 -13168
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -10560
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +24 -25
- package/src/analysis/sequence-activity-cliffs.ts +11 -9
- package/src/analysis/sequence-search-base-viewer.ts +2 -1
- package/src/analysis/sequence-similarity-viewer.ts +3 -3
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +4 -4
- package/src/package-test.ts +10 -2
- package/src/package.ts +215 -131
- package/src/substructure-search/substructure-search.ts +19 -16
- package/src/tests/Palettes-test.ts +1 -1
- package/src/tests/WebLogo-positions-test.ts +113 -57
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +4 -5
- package/src/tests/checkInputColumn-tests.ts +1 -1
- package/src/tests/converters-test.ts +52 -17
- package/src/tests/detectors-benchmark-tests.ts +3 -2
- package/src/tests/detectors-tests.ts +177 -172
- package/src/tests/detectors-weak-and-likely-tests.ts +129 -0
- package/src/tests/fasta-export-tests.ts +1 -1
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +21 -19
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +4 -5
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +1 -1
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +88 -35
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +8 -2
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +44 -20
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +2 -1
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +113 -72
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039
|
@@ -4,7 +4,7 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
6
|
import {saveAsFastaDo, wrapSequence} from '../utils/save-as-fasta';
|
|
7
|
-
import {splitterAsFasta} from '@datagrok-libraries/bio';
|
|
7
|
+
import {splitterAsFasta} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
|
|
9
9
|
type SaveAsFastaTestArgs = { srcCsv: string, idCols: string [], seqCol: string, lineWidth: number, tgtFasta: string };
|
|
10
10
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {test, after, before, category, expect} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
+
import {LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
category('monomerLibraries', () => {
|
|
12
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
13
|
+
/** Backup actual user's monomer libraries settings */
|
|
14
|
+
let userLibrariesSettings: any = null;
|
|
15
|
+
|
|
16
|
+
before(async () => {
|
|
17
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
after(async () => {
|
|
22
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('default', async () => {
|
|
26
|
+
// Clear settings to test default
|
|
27
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, {}, true);
|
|
28
|
+
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
29
|
+
|
|
30
|
+
// Currently default monomer lib is empty
|
|
31
|
+
const currentMonomerLib = monomerLibHelper.getBioLib();
|
|
32
|
+
expect(currentMonomerLib.getTypes().length, 0);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {category, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
4
|
+
import {runPepsea} from '../utils/pepsea';
|
|
5
|
+
|
|
6
|
+
category('PepSeA', () => {
|
|
7
|
+
const testCsv = `HELM,MSA
|
|
8
|
+
"PEPTIDE1{F.L.R.G.W.[MeF].Y.S.N.N.C}$$$$","F.L.R.G.W.MeF.Y..S.N.N.C"
|
|
9
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.N.C}$$$$","F.L.R.G.Y.MeF.Y.W...N.C"
|
|
10
|
+
"PEPTIDE1{F.G.Y.[MeF].Y.W.S.D.N.C}$$$$","F...G.Y.MeF.Y.W.S.D.N.C"
|
|
11
|
+
"PEPTIDE1{F.L.R.G.Y.[MeF].Y.W.S.N.D.C}$$$$","F.L.R.G.Y.MeF.Y.W.S.N.D.C"
|
|
12
|
+
"PEPTIDE1{F.V.R.G.Y.[MeF].Y.W.S.N.C}$$$$","F.V.R.G.Y.MeF.Y.W.S..N.C"`;
|
|
13
|
+
|
|
14
|
+
test('Basic alignment', async () => {
|
|
15
|
+
const table = DG.DataFrame.fromCsv(testCsv);
|
|
16
|
+
const alignedCol = await runPepsea(table.getCol('HELM'), 'msa(HELM)');
|
|
17
|
+
const alignedTestCol = table.getCol('MSA');
|
|
18
|
+
for (let i = 0; i < alignedCol.length; ++i)
|
|
19
|
+
expect(alignedCol.get(i) == alignedTestCol.get(i), true);
|
|
20
|
+
}, {skipReason: 'GROK-12764'});
|
|
21
|
+
});
|
|
@@ -5,10 +5,11 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
|
|
|
5
5
|
|
|
6
6
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
7
7
|
import {convertDo} from '../utils/convert';
|
|
8
|
-
import
|
|
8
|
+
import * as C from '../utils/constants';
|
|
9
9
|
import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
|
|
10
10
|
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
11
|
-
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS
|
|
11
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
12
13
|
|
|
13
14
|
category('renderers', () => {
|
|
14
15
|
let tvList: DG.TableView[];
|
|
@@ -20,8 +21,9 @@ category('renderers', () => {
|
|
|
20
21
|
});
|
|
21
22
|
|
|
22
23
|
after(async () => {
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
// Closing viewes and data frames leads to exception
|
|
25
|
+
// dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
|
|
26
|
+
// tvList.forEach((tv: DG.TableView) => tv.close());
|
|
25
27
|
});
|
|
26
28
|
|
|
27
29
|
test('long sequence performance ', async () => {
|
|
@@ -50,7 +52,7 @@ category('renderers', () => {
|
|
|
50
52
|
|
|
51
53
|
test('afterConvert', async () => {
|
|
52
54
|
await _testAfterConvert();
|
|
53
|
-
});
|
|
55
|
+
}, {skipReason: 'GROK-12765'});
|
|
54
56
|
|
|
55
57
|
test('selectRendererBySemType', async () => {
|
|
56
58
|
await _selectRendererBySemType();
|
|
@@ -104,8 +106,8 @@ category('renderers', () => {
|
|
|
104
106
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
105
107
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
106
108
|
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
107
|
-
seqDiffCol.tags[
|
|
108
|
-
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
109
|
+
seqDiffCol.tags[bioTAGS.separator] = '/';
|
|
110
|
+
seqDiffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
109
111
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
110
112
|
|
|
111
113
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
@@ -116,7 +118,7 @@ category('renderers', () => {
|
|
|
116
118
|
tvList.push(tv);
|
|
117
119
|
|
|
118
120
|
const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
119
|
-
expect(resCellRenderer,
|
|
121
|
+
expect(resCellRenderer, C.SEM_TYPES.MACROMOLECULE_DIFFERENCE);
|
|
120
122
|
}
|
|
121
123
|
|
|
122
124
|
async function _testAfterMsa() {
|
|
@@ -143,17 +145,17 @@ category('renderers', () => {
|
|
|
143
145
|
expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
144
146
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
145
147
|
|
|
146
|
-
const msaSeqCol
|
|
148
|
+
const msaSeqCol = multipleSequenceAlignmentAny(srcSeqCol);
|
|
147
149
|
tv.grid.invalidate();
|
|
148
150
|
|
|
149
|
-
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
150
|
-
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
151
|
-
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
152
|
-
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
153
|
-
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
151
|
+
// expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
152
|
+
// expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
153
|
+
// expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
154
|
+
// expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
155
|
+
// expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
154
156
|
|
|
155
157
|
// check newColumn with UnitsHandler constructor
|
|
156
|
-
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
158
|
+
// const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
157
159
|
|
|
158
160
|
dfList.push(df);
|
|
159
161
|
tvList.push(tv);
|
|
@@ -191,8 +193,8 @@ category('renderers', () => {
|
|
|
191
193
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
192
194
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
193
195
|
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
194
|
-
seqDiffCol.tags[
|
|
195
|
-
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
196
|
+
seqDiffCol.tags[bioTAGS.separator] = '/';
|
|
197
|
+
seqDiffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
196
198
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
197
199
|
const tv = grok.shell.addTableView(df);
|
|
198
200
|
dfList.push(df);
|
|
@@ -213,8 +215,8 @@ category('renderers', () => {
|
|
|
213
215
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
214
216
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
215
217
|
seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
|
|
216
|
-
seqDiffCol.tags[
|
|
217
|
-
seqDiffCol.semType =
|
|
218
|
+
seqDiffCol.tags[bioTAGS.separator] = '/';
|
|
219
|
+
seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
218
220
|
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
219
221
|
seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
|
|
220
222
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
import
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
2
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
3
6
|
import {readDataframe} from './utils';
|
|
4
|
-
import * as grok from 'datagrok-api/grok';
|
|
5
7
|
import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
|
|
6
8
|
|
|
7
9
|
category('sequenceSpace', async () => {
|
|
@@ -16,7 +18,7 @@ category('sequenceSpace', async () => {
|
|
|
16
18
|
await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'MSA');
|
|
17
19
|
grok.shell.closeTable(testFastaDf);
|
|
18
20
|
testFastaTableView.close();
|
|
19
|
-
});
|
|
21
|
+
}, {skipReason: 'GROK-12775'});
|
|
20
22
|
|
|
21
23
|
test('sequenceSpaceWithEmptyRows', async () => {
|
|
22
24
|
testHelmWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
|
|
@@ -24,5 +26,5 @@ category('sequenceSpace', async () => {
|
|
|
24
26
|
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'MSA');
|
|
25
27
|
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
26
28
|
testHelmWithEmptyRowsTableView.close();
|
|
27
|
-
});
|
|
29
|
+
}, {skipReason: 'GROK-12775'});
|
|
28
30
|
});
|
|
@@ -31,9 +31,9 @@ category('similarity/diversity', async () => {
|
|
|
31
31
|
|
|
32
32
|
async function _testSimilaritySearchViewer() {
|
|
33
33
|
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
34
|
-
const viewer = molecules.addViewer('
|
|
34
|
+
const viewer = molecules.addViewer('Sequence Similarity Search');
|
|
35
35
|
await delay(100);
|
|
36
|
-
const similaritySearchViewer = getSearchViewer(viewer, '
|
|
36
|
+
const similaritySearchViewer = getSearchViewer(viewer, 'Sequence Similarity Search');
|
|
37
37
|
viewList.push(similaritySearchViewer);
|
|
38
38
|
viewList.push(molecules);
|
|
39
39
|
if (!similaritySearchViewer.molCol)
|
|
@@ -59,9 +59,9 @@ async function _testSimilaritySearchViewer() {
|
|
|
59
59
|
|
|
60
60
|
async function _testDiversitySearchViewer() {
|
|
61
61
|
const molecules = await createTableView('tests/sample_MSA_data.csv');
|
|
62
|
-
const viewer = molecules.addViewer('
|
|
62
|
+
const viewer = molecules.addViewer('Sequence Diversity Search');
|
|
63
63
|
await delay(10);
|
|
64
|
-
const diversitySearchviewer = getSearchViewer(viewer, '
|
|
64
|
+
const diversitySearchviewer = getSearchViewer(viewer, 'Sequence Diversity Search');
|
|
65
65
|
viewList.push(diversitySearchviewer);
|
|
66
66
|
viewList.push(molecules);
|
|
67
67
|
if (!diversitySearchviewer.renderMolIds)
|
|
@@ -6,7 +6,7 @@ import {after, before, category, test, expect, expectArray, expectObject} from '
|
|
|
6
6
|
import * as C from '../utils/constants';
|
|
7
7
|
import {splitToMonomers, _package, getHelmMonomers} from '../package';
|
|
8
8
|
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
9
|
-
import {splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio';
|
|
9
|
+
import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
category('splitters', () => {
|
|
@@ -83,7 +83,7 @@ category('splitters', () => {
|
|
|
83
83
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
84
84
|
if (semType)
|
|
85
85
|
seqCol.semType = semType;
|
|
86
|
-
seqCol.setTag(
|
|
86
|
+
seqCol.setTag(bioTAGS.aligned, C.MSA);
|
|
87
87
|
|
|
88
88
|
const tv: DG.TableView = grok.shell.addTableView(df);
|
|
89
89
|
// call to calculate 'cell.renderer' tag
|
|
@@ -92,9 +92,9 @@ category('splitters', () => {
|
|
|
92
92
|
dfList.push(df);
|
|
93
93
|
tvList.push(tv);
|
|
94
94
|
|
|
95
|
-
splitToMonomers(
|
|
95
|
+
splitToMonomers();
|
|
96
96
|
expect(df.columns.names().includes('17'), true);
|
|
97
|
-
});
|
|
97
|
+
}, {skipReason: 'GROK-12766'});
|
|
98
98
|
|
|
99
99
|
test('getHelmMonomers', async () => {
|
|
100
100
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(
|
|
@@ -132,4 +132,3 @@ export async function _testHelmSplitter(src: string, tgt: string[]) {
|
|
|
132
132
|
console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
|
|
133
133
|
expectArray(res, tgt);
|
|
134
134
|
}
|
|
135
|
-
|
|
@@ -1,11 +1,33 @@
|
|
|
1
1
|
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
4
5
|
import {readDataframe} from './utils';
|
|
5
6
|
import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
8
|
+
import {LIB_DEFAULT, LIB_STORAGE_NAME} from '../utils/monomer-lib';
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
category('substructureFilters', async () => {
|
|
12
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
13
|
+
/** Backup actual user's monomer libraries settings */
|
|
14
|
+
let userLibrariesSettings: {};
|
|
15
|
+
|
|
16
|
+
before(async () => {
|
|
17
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
18
|
+
userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
|
|
19
|
+
|
|
20
|
+
// Test 'helm' requires default monomer library loaded
|
|
21
|
+
await grok.dapi.userDataStorage.post(LIB_STORAGE_NAME, LIB_DEFAULT, true);
|
|
22
|
+
await monomerLibHelper.loadLibraries(true); // load default libraries
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
after(async () => {
|
|
26
|
+
// UserDataStorage.put() replaces existing data
|
|
27
|
+
await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
|
|
28
|
+
await monomerLibHelper.loadLibraries(true); // load user settings libraries
|
|
29
|
+
});
|
|
30
|
+
|
|
9
31
|
test('fasta', async () => {
|
|
10
32
|
const fasta = await readDataframe('tests/filter_FASTA.csv');
|
|
11
33
|
const filter = new BioSubstructureFilter();
|
|
@@ -69,5 +91,5 @@ category('substructureFilters', async () => {
|
|
|
69
91
|
expect(filter.dataFrame!.filter.trueCount, 1);
|
|
70
92
|
expect(filter.dataFrame!.filter.get(3), true);
|
|
71
93
|
helmTableView.close();
|
|
72
|
-
}, {skipReason: '
|
|
94
|
+
}, {skipReason: 'GROK-12779'});
|
|
73
95
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio';
|
|
4
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
export function generateManySequences(): DG.Column[] {
|
package/src/tests/utils.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
|
|
4
|
+
import {_package} from '../package-test';
|
|
3
5
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
6
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
5
|
-
import {_package} from '../package-test';
|
|
6
7
|
|
|
7
8
|
export async function loadFileAsText(name: string): Promise<string> {
|
|
8
9
|
return await _package.files.readAsText(name);
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
// import * as grok from 'datagrok-api/grok';
|
|
3
|
+
//import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
import {readDataframe} from './utils';
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
category('viewers', () => {
|
|
10
|
+
const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
|
|
11
|
+
for (const v of viewers) {
|
|
12
|
+
test(v, async () => {
|
|
13
|
+
await testViewer(v, await readDataframe('data/sample_FASTA_DNA.csv'), true);
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
});
|
|
@@ -9,11 +9,23 @@ import {
|
|
|
9
9
|
getPaletteByType,
|
|
10
10
|
getSplitter,
|
|
11
11
|
monomerToShort,
|
|
12
|
-
|
|
12
|
+
NOTATION,
|
|
13
13
|
SplitterFunc,
|
|
14
14
|
TAGS as bioTAGS,
|
|
15
|
-
|
|
16
|
-
} from '@datagrok-libraries/bio';
|
|
15
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
16
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
17
|
+
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
18
|
+
|
|
19
|
+
const enum tempTAGS {
|
|
20
|
+
referenceSequence = 'reference-sequence',
|
|
21
|
+
currentWord = 'current-word',
|
|
22
|
+
monomerWidth = 'monomer-width',
|
|
23
|
+
bioSumMaxLengthWords = 'bio-sum-maxLengthWords',
|
|
24
|
+
bioMaxIndex = 'bio-maxIndex',
|
|
25
|
+
bioMaxLengthWords = 'bio-maxLengthWords',
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
type TempType = { [tagName: string]: any };
|
|
17
29
|
|
|
18
30
|
const undefinedColor = 'rgb(100,100,100)';
|
|
19
31
|
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = monomerToShort;
|
|
@@ -50,7 +62,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
50
62
|
get defaultWidth(): number { return 230; }
|
|
51
63
|
|
|
52
64
|
onClick(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
53
|
-
gridCell.cell.column.temp
|
|
65
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
66
|
+
colTemp[tempTAGS.currentWord] = gridCell.cell.value;
|
|
54
67
|
gridCell.grid.invalidate();
|
|
55
68
|
}
|
|
56
69
|
|
|
@@ -58,8 +71,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
58
71
|
if (gridCell.cell.column.getTag(bioTAGS.aligned) !== ALIGNMENT.SEQ_MSA)
|
|
59
72
|
return;
|
|
60
73
|
|
|
61
|
-
const
|
|
62
|
-
const
|
|
74
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
75
|
+
const maxLengthWordsSum = colTemp[tempTAGS.bioSumMaxLengthWords];
|
|
76
|
+
const maxIndex = colTemp[tempTAGS.bioMaxIndex];
|
|
63
77
|
const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
|
|
64
78
|
let left = 0;
|
|
65
79
|
let right = maxIndex;
|
|
@@ -107,7 +121,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
107
121
|
) {
|
|
108
122
|
const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
|
|
109
123
|
const cell = gridCell.cell;
|
|
110
|
-
const paletteType = gridCell.cell.column.getTag(
|
|
124
|
+
const paletteType = gridCell.cell.column.getTag(bioTAGS.alphabet);
|
|
111
125
|
const minDistanceRenderer = 50;
|
|
112
126
|
w = getUpdatedWidth(grid, g, x, w);
|
|
113
127
|
g.save();
|
|
@@ -119,19 +133,28 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
119
133
|
|
|
120
134
|
//TODO: can this be replaced/merged with splitSequence?
|
|
121
135
|
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
136
|
+
const aligned: string = gridCell.cell.column.getTag(bioTAGS.aligned);
|
|
122
137
|
|
|
123
138
|
const palette = getPaletteByType(paletteType);
|
|
124
139
|
|
|
125
|
-
const separator = gridCell.cell.column.getTag(
|
|
126
|
-
const splitLimit =
|
|
140
|
+
const separator = gridCell.cell.column.getTag(bioTAGS.separator) ?? '';
|
|
141
|
+
const splitLimit = w / 5;
|
|
127
142
|
const splitterFunc: SplitterFunc = getSplitter(units, separator, splitLimit);
|
|
128
|
-
const referenceSequence: string[] = splitterFunc(((gridCell.cell.column?.temp['reference-sequence'] != null) && (gridCell.cell.column?.temp['reference-sequence'] != ''))
|
|
129
|
-
? gridCell.cell.column.temp['reference-sequence'] : gridCell.cell.column.temp['current-word'] ?? '');
|
|
130
|
-
const monomerWidth = (gridCell.cell.column?.temp['monomer-width'] != null) ? gridCell.cell.column.temp['monomer-width'] : 'short';
|
|
131
|
-
let gapRenderer = 5;
|
|
132
143
|
|
|
133
|
-
|
|
144
|
+
// TODO: Store temp data to GridColumn
|
|
145
|
+
// Now the renderer requires data frame table Column underlying GridColumn
|
|
146
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
147
|
+
|
|
148
|
+
const tempReferenceSequence: string | null = colTemp[tempTAGS.referenceSequence];
|
|
149
|
+
const tempCurrentWord: string | null = colTemp[tempTAGS.currentWord];
|
|
150
|
+
const tempMonomerWidth: string | null = colTemp[tempTAGS.monomerWidth];
|
|
151
|
+
const referenceSequence: string[] = splitterFunc(
|
|
152
|
+
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
153
|
+
tempReferenceSequence : tempCurrentWord ?? '');
|
|
154
|
+
const monomerWidth: string = (tempMonomerWidth != null) ? tempMonomerWidth : 'short';
|
|
134
155
|
|
|
156
|
+
let gapRenderer = 5;
|
|
157
|
+
let maxIndex = 0;
|
|
135
158
|
let maxLengthOfMonomer = 8;
|
|
136
159
|
|
|
137
160
|
if (monomerWidth === 'short') {
|
|
@@ -149,44 +172,42 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
149
172
|
const textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
|
|
150
173
|
if (textSize > (maxLengthWords[index] ?? 0))
|
|
151
174
|
maxLengthWords[index] = textSize;
|
|
152
|
-
if (index > maxIndex)
|
|
153
|
-
maxIndex = index;
|
|
154
|
-
}
|
|
175
|
+
if (index > maxIndex) maxIndex = index;
|
|
155
176
|
});
|
|
156
177
|
samples += 1;
|
|
157
178
|
}
|
|
158
179
|
const minLength = 3 * 7;
|
|
159
180
|
for (let i = 0; i <= maxIndex; i++) {
|
|
160
|
-
if (maxLengthWords[i] < minLength)
|
|
161
|
-
maxLengthWords[i] = minLength;
|
|
162
|
-
}
|
|
181
|
+
if (maxLengthWords[i] < minLength) maxLengthWords[i] = minLength;
|
|
163
182
|
const maxLengthWordSum: any = {};
|
|
164
183
|
maxLengthWordSum[0] = maxLengthWords[0];
|
|
165
|
-
for (let i = 1; i <= maxIndex; i++)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
gridCell.cell.column.temp['bio-maxIndex'] = maxIndex;
|
|
170
|
-
gridCell.cell.column.temp['bio-maxLengthWords'] = maxLengthWords;
|
|
184
|
+
for (let i = 1; i <= maxIndex; i++) maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
|
|
185
|
+
colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
|
|
186
|
+
colTemp[tempTAGS.bioMaxIndex] = maxIndex;
|
|
187
|
+
colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
|
|
171
188
|
gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
|
|
172
189
|
}
|
|
173
190
|
} else {
|
|
174
|
-
maxLengthWords =
|
|
191
|
+
maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
|
|
175
192
|
}
|
|
176
193
|
|
|
177
194
|
const subParts: string[] = splitterFunc(cell.value);
|
|
178
195
|
let x1 = x;
|
|
179
196
|
let color = undefinedColor;
|
|
180
197
|
let drawStyle = DrawStyle.classic;
|
|
181
|
-
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
|
|
182
201
|
drawStyle = DrawStyle.MSA;
|
|
183
202
|
|
|
184
203
|
subParts.every((amino, index) => {
|
|
185
204
|
color = palette.get(amino);
|
|
186
205
|
g.fillStyle = undefinedColor;
|
|
187
206
|
const last = index === subParts.length - 1;
|
|
188
|
-
x1 = printLeftOrCentered(x1, y, w, h,
|
|
189
|
-
|
|
207
|
+
x1 = printLeftOrCentered(x1, y, w, h,
|
|
208
|
+
g, amino, color, 0, true, 1.0, separator, last, drawStyle,
|
|
209
|
+
maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
|
|
210
|
+
return minDistanceRenderer <= w;
|
|
190
211
|
});
|
|
191
212
|
|
|
192
213
|
g.restore();
|
|
@@ -221,7 +242,7 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
221
242
|
g.textBaseline = 'middle';
|
|
222
243
|
g.textAlign = 'center';
|
|
223
244
|
|
|
224
|
-
const palette = getPaletteByType(gridCell.cell.column.getTag(
|
|
245
|
+
const palette = getPaletteByType(gridCell.cell.column.getTag(bioTAGS.alphabet));
|
|
225
246
|
const s: string = gridCell.cell.value;
|
|
226
247
|
if (!s)
|
|
227
248
|
return;
|
|
@@ -259,7 +280,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
259
280
|
const grid = gridCell.grid;
|
|
260
281
|
const cell = gridCell.cell;
|
|
261
282
|
const s: string = cell.value ?? '';
|
|
262
|
-
const separator = gridCell.tableColumn!.tags[
|
|
283
|
+
const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
|
|
263
284
|
const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
|
|
264
285
|
w = getUpdatedWidth(grid, g, x, w);
|
|
265
286
|
//TODO: can this be replaced/merged with splitSequence?
|
|
@@ -284,9 +305,9 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
284
305
|
molDifferences?: { [key: number]: HTMLCanvasElement }
|
|
285
306
|
): void {
|
|
286
307
|
if (subParts1.length !== subParts2.length) {
|
|
287
|
-
const
|
|
288
|
-
subParts1
|
|
289
|
-
|
|
308
|
+
const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
|
|
309
|
+
subParts1 = sequences.subParts1;
|
|
310
|
+
subParts2 = sequences.subParts2;
|
|
290
311
|
}
|
|
291
312
|
const textSize1 = g.measureText(processSequence(subParts1).join(''));
|
|
292
313
|
const textSize2 = g.measureText(processSequence(subParts2).join(''));
|
|
@@ -329,6 +350,11 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
329
350
|
g.restore();
|
|
330
351
|
}
|
|
331
352
|
|
|
353
|
+
interface IComparedSequences{
|
|
354
|
+
subParts1: string[];
|
|
355
|
+
subParts2: string[];
|
|
356
|
+
}
|
|
357
|
+
|
|
332
358
|
function createDifferenceCanvas(
|
|
333
359
|
amino1: string,
|
|
334
360
|
amino2: string,
|
|
@@ -351,3 +377,30 @@ function createDifferenceCanvas(
|
|
|
351
377
|
printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
|
|
352
378
|
return canvas;
|
|
353
379
|
}
|
|
380
|
+
|
|
381
|
+
function fillShorterSequence(subParts1: string[], subParts2: string[]): IComparedSequences {
|
|
382
|
+
let numIdenticalStart = 0;
|
|
383
|
+
let numIdenticalEnd = 0;
|
|
384
|
+
const longerSeq = subParts1.length > subParts2.length ? subParts1 : subParts2;
|
|
385
|
+
let shorterSeq = subParts1.length > subParts2.length ? subParts2 : subParts1;
|
|
386
|
+
|
|
387
|
+
for (let i = 0; i < shorterSeq.length; i++) {
|
|
388
|
+
if (longerSeq[i] === shorterSeq[i])
|
|
389
|
+
numIdenticalStart++;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const lengthDiff = longerSeq.length - shorterSeq.length;
|
|
393
|
+
for (let i = longerSeq.length - 1; i > lengthDiff; i--) {
|
|
394
|
+
if (longerSeq[i] === shorterSeq[i - lengthDiff])
|
|
395
|
+
numIdenticalEnd++;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
|
|
399
|
+
|
|
400
|
+
function concatWithEmptyVals(subparts: string[]): string[] {
|
|
401
|
+
return numIdenticalStart > numIdenticalEnd ? subparts.concat(emptyMonomersArray) : emptyMonomersArray.concat(subparts);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
subParts1.length > subParts2.length ? subParts2 = concatWithEmptyVals(subParts2) : subParts1 = concatWithEmptyVals(subParts1);
|
|
405
|
+
return {subParts1: subParts1, subParts2: subParts2};
|
|
406
|
+
}
|
package/src/utils/constants.ts
CHANGED
|
@@ -12,18 +12,14 @@ export enum COLUMNS_NAMES {
|
|
|
12
12
|
export enum TAGS {
|
|
13
13
|
AAR = 'AAR',
|
|
14
14
|
POSITION = 'Pos',
|
|
15
|
-
SEPARATOR = 'separator',
|
|
16
15
|
SELECTION = 'selection',
|
|
17
|
-
ALPHABET = 'alphabet',
|
|
18
|
-
ALIGNED = 'aligned',
|
|
19
16
|
}
|
|
20
17
|
|
|
21
18
|
export enum SEM_TYPES {
|
|
22
19
|
MONOMER = 'Monomer',
|
|
23
20
|
MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
|
|
24
21
|
ACTIVITY = 'activity',
|
|
25
|
-
ACTIVITY_SCALED = 'activityScaled'
|
|
26
|
-
MACROMOLECULE = 'Macromolecule',
|
|
22
|
+
ACTIVITY_SCALED = 'activityScaled'
|
|
27
23
|
}
|
|
28
24
|
|
|
29
25
|
export const MSA = 'MSA';
|
|
@@ -47,7 +43,7 @@ export const aarGroups = {
|
|
|
47
43
|
'-': '-',
|
|
48
44
|
};
|
|
49
45
|
|
|
50
|
-
export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
|
|
46
|
+
export const groupDescription: { [key: string]: { 'description': string, aminoAcids: string[] } } = {
|
|
51
47
|
'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
|
|
52
48
|
'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
|
|
53
49
|
'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
|
|
@@ -58,3 +54,8 @@ export const groupDescription: {[key: string]: {'description': string, aminoAcid
|
|
|
58
54
|
},
|
|
59
55
|
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
60
56
|
};
|
|
57
|
+
|
|
58
|
+
export namespace PEPSEA {
|
|
59
|
+
export const SEPARATOR = '.';
|
|
60
|
+
}
|
|
61
|
+
|
package/src/utils/convert.ts
CHANGED
|
@@ -4,7 +4,8 @@ import * as grok from 'datagrok-api/grok';
|
|
|
4
4
|
|
|
5
5
|
import $ from 'cash-dom';
|
|
6
6
|
import {Subscription} from 'rxjs';
|
|
7
|
-
import {NOTATION
|
|
7
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -46,7 +47,7 @@ export function convert(col: DG.Column): void {
|
|
|
46
47
|
});
|
|
47
48
|
|
|
48
49
|
if (convertDialog == null) {
|
|
49
|
-
convertDialog = ui.dialog('Convert
|
|
50
|
+
convertDialog = ui.dialog('Convert Sequence Notation')
|
|
50
51
|
.add(ui.div([
|
|
51
52
|
ui.divText(
|
|
52
53
|
'Current notation: ' + currentNotation,
|
|
@@ -86,6 +87,11 @@ export async function convertDo(
|
|
|
86
87
|
const newColumn = converter.convert(targetNotation, separator);
|
|
87
88
|
srcCol.dataFrame.columns.add(newColumn);
|
|
88
89
|
|
|
90
|
+
// Call detector directly to escape some error on detectSemanticTypes
|
|
91
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
|
|
92
|
+
if (semType)
|
|
93
|
+
newColumn.semType = semType;
|
|
94
|
+
|
|
89
95
|
// call to calculate 'cell.renderer' tag
|
|
90
96
|
await grok.data.detectSemanticTypes(srcCol.dataFrame);
|
|
91
97
|
|