@datagrok/bio 2.0.18 → 2.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +664 -440
- package/dist/package.js +382 -156
- package/package.json +2 -2
- package/src/package.ts +14 -2
- package/src/substructure-search/substructure-search.ts +53 -23
- package/src/tests/Palettes-test.ts +9 -10
- package/src/tests/WebLogo-positions-test.ts +6 -8
- package/src/tests/checkInputColumn-tests.ts +10 -9
- package/src/tests/convert-test.ts +30 -31
- package/src/tests/detectors-test.ts +6 -5
- package/src/tests/renderers-test.ts +13 -12
- package/src/utils/cell-renderer.ts +8 -23
- package/src/utils/multiple-sequence-alignment.ts +5 -4
- package/src/utils/utils.ts +1 -1
- package/src/viewers/vd-regions-viewer.ts +2 -4
- package/src/widgets/bio-substructure-filter.ts +153 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.20",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.1.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.10.1",
|
package/src/package.ts
CHANGED
|
@@ -31,6 +31,7 @@ import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
|
31
31
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
32
32
|
import {substructureSearchDialog} from './substructure-search/substructure-search';
|
|
33
33
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
34
|
+
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
34
35
|
|
|
35
36
|
//tags: init
|
|
36
37
|
export async function initBio() {
|
|
@@ -449,7 +450,7 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
449
450
|
//tags: panel, bio
|
|
450
451
|
//input: column col {semType: Macromolecule}
|
|
451
452
|
export function splitToMonomers(col: DG.Column<string>): void {
|
|
452
|
-
if (!col.getTag(bio.
|
|
453
|
+
if (!col.getTag(bio.TAGS.aligned).includes(C.MSA))
|
|
453
454
|
return grok.shell.error('Splitting is applicable only for aligned sequences');
|
|
454
455
|
|
|
455
456
|
const tempDf = splitAlignedSequences(col);
|
|
@@ -517,4 +518,15 @@ export function bioSubstructureSearch(col: DG.Column): void {
|
|
|
517
518
|
//tags: fileExporter
|
|
518
519
|
export function saveAsFasta() {
|
|
519
520
|
saveAsFastaUI();
|
|
520
|
-
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
//name: BioSubstructureFilter
|
|
524
|
+
//description: Substructure filter for linear macromolecules
|
|
525
|
+
//tags: filter
|
|
526
|
+
//output: filter result
|
|
527
|
+
//meta.semType: Macromolecule
|
|
528
|
+
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
529
|
+
return new BioSubstructureFilter();
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
-
import
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
5
6
|
import * as C from '../utils/constants';
|
|
7
|
+
import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
8
|
+
import {BitSet} from 'datagrok-api/dg';
|
|
9
|
+
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
6
10
|
|
|
7
11
|
/**
|
|
8
12
|
* Searches substructure in each row of Macromolecule column
|
|
@@ -12,48 +16,58 @@ import * as C from '../utils/constants';
|
|
|
12
16
|
export function substructureSearchDialog(col: DG.Column): void {
|
|
13
17
|
const units = col.getTag(DG.TAGS.UNITS);
|
|
14
18
|
const separator = col.getTag(C.TAGS.SEPARATOR);
|
|
15
|
-
const notations = [NOTATION.FASTA, NOTATION.SEPARATOR];
|
|
19
|
+
// const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
|
|
16
20
|
|
|
17
21
|
const substructureInput = ui.textInput('Substructure', '');
|
|
18
|
-
|
|
22
|
+
|
|
23
|
+
const editHelmLink = ui.link('Edit helm', async () => {
|
|
24
|
+
updateDivInnerHTML(inputsDiv, grid.root);
|
|
25
|
+
await ui.tools.waitForElementInDom(grid.root);
|
|
26
|
+
setTimeout(() => {
|
|
27
|
+
grid.cell('substr_helm', 0).element.children[0].dispatchEvent(new KeyboardEvent('keydown', {key: 'Enter'}));
|
|
28
|
+
}, 100);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const df = DG.DataFrame.create(1);
|
|
32
|
+
df.columns.addNewString('substr_helm').init((i) => '');
|
|
33
|
+
df.col('substr_helm')!.semType = col.semType;
|
|
34
|
+
df.col('substr_helm')!.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
35
|
+
const grid = df.plot.grid();
|
|
19
36
|
const separatorInput = ui.textInput('Separator', separator);
|
|
20
37
|
|
|
21
|
-
|
|
22
|
-
const toggleSeparator = () => {
|
|
23
|
-
if (notationInput.value !== NOTATION.SEPARATOR)
|
|
24
|
-
separatorInput.root.hidden = true;
|
|
25
|
-
else
|
|
26
|
-
separatorInput.root.hidden = false;
|
|
27
|
-
};
|
|
38
|
+
const inputsDiv = ui.div();
|
|
28
39
|
|
|
29
|
-
|
|
40
|
+
const inputs = units === bio.NOTATION.HELM ? ui.divV([editHelmLink]) :
|
|
41
|
+
units === bio.NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
|
|
42
|
+
ui.inputs([substructureInput]);
|
|
30
43
|
|
|
31
|
-
|
|
32
|
-
toggleSeparator();
|
|
33
|
-
});
|
|
44
|
+
updateDivInnerHTML(inputsDiv, inputs);
|
|
34
45
|
|
|
35
46
|
ui.dialog('Substructure search')
|
|
36
|
-
.add(ui.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
separatorInput
|
|
47
|
+
.add(ui.divV([
|
|
48
|
+
ui.divText(`Notation: ${units}`),
|
|
49
|
+
inputsDiv
|
|
40
50
|
]))
|
|
41
|
-
.onOK(() => {
|
|
42
|
-
let substructure = substructureInput.value;
|
|
43
|
-
if (
|
|
51
|
+
.onOK(async () => {
|
|
52
|
+
let substructure = units === bio.NOTATION.HELM ? df.get('substr_helm', 0) : substructureInput.value;
|
|
53
|
+
if (units === bio.NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
|
|
44
54
|
substructure = substructure.replaceAll(separatorInput.value, separator);
|
|
45
55
|
const matchesColName = `Matches: ${substructure}`;
|
|
46
56
|
const colExists = col.dataFrame.columns.names()
|
|
47
57
|
.filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
|
|
48
58
|
if (!colExists) {
|
|
49
|
-
|
|
59
|
+
let matches: BitSet;
|
|
60
|
+
if (units === bio.NOTATION.HELM)
|
|
61
|
+
matches = await helmSubstructureSearch(substructure, col);
|
|
62
|
+
else
|
|
63
|
+
matches = linearSubstructureSearch(substructure, col);
|
|
50
64
|
col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
|
|
51
65
|
} else { grok.shell.warning(`Search ${substructure} is already performed`); }
|
|
52
66
|
})
|
|
53
67
|
.show();
|
|
54
68
|
}
|
|
55
69
|
|
|
56
|
-
export function
|
|
70
|
+
export function linearSubstructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
57
71
|
const lowerCaseSubstr = substructure.toLowerCase();
|
|
58
72
|
const resultArray = DG.BitSet.create(col.length);
|
|
59
73
|
for (let i = 0; i < col.length; i++) {
|
|
@@ -63,3 +77,19 @@ export function substructureSearch(substructure: string, col: DG.Column): DG.Bit
|
|
|
63
77
|
}
|
|
64
78
|
return resultArray;
|
|
65
79
|
}
|
|
80
|
+
|
|
81
|
+
async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
|
|
82
|
+
const helmColWithSubstructure = DG.Column.string('helm', col.length + 1)
|
|
83
|
+
.init((i) => i === col.length ? substructure : col.get(i));
|
|
84
|
+
helmColWithSubstructure.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
85
|
+
const monomericMolsCol = await getMonomericMols(helmColWithSubstructure, true);
|
|
86
|
+
const molSubstructure = monomericMolsCol.get(col.length);
|
|
87
|
+
const monomericMolsDf = DG.DataFrame.fromColumns([monomericMolsCol]);
|
|
88
|
+
monomericMolsDf.rows.removeAt(col.length);
|
|
89
|
+
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
90
|
+
molStringsColumn: monomericMolsDf.columns.byIndex(0),
|
|
91
|
+
molString: molSubstructure,
|
|
92
|
+
molBlockFailover: '',
|
|
93
|
+
});
|
|
94
|
+
return matchesCol.get(0);
|
|
95
|
+
}
|
|
@@ -1,27 +1,26 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
5
|
|
|
5
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
7
|
|
|
7
8
|
import {_testPaletteN, _testPaletteAA} from '@datagrok-libraries/bio/src/tests/palettes-tests';
|
|
8
|
-
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
|
-
import {AminoacidsPalettes} from '@datagrok-libraries/bio/src/aminoacids';
|
|
10
9
|
|
|
11
10
|
category('Palettes', () => {
|
|
12
11
|
test('testPaletteN', async () => { await _testPaletteN(); });
|
|
13
12
|
test('testPaletteAA', async () => { await _testPaletteAA(); });
|
|
14
13
|
|
|
15
14
|
test('testPalettePtMe', async () => {
|
|
16
|
-
const colorMeNle = AminoacidsPalettes.GrokGroups.get('MeNle');
|
|
17
|
-
const colorMeA = AminoacidsPalettes.GrokGroups.get('MeA');
|
|
18
|
-
const colorMeG = AminoacidsPalettes.GrokGroups.get('MeG');
|
|
19
|
-
const colorMeF = AminoacidsPalettes.GrokGroups.get('MeF');
|
|
15
|
+
const colorMeNle = bio.AminoacidsPalettes.GrokGroups.get('MeNle');
|
|
16
|
+
const colorMeA = bio.AminoacidsPalettes.GrokGroups.get('MeA');
|
|
17
|
+
const colorMeG = bio.AminoacidsPalettes.GrokGroups.get('MeG');
|
|
18
|
+
const colorMeF = bio.AminoacidsPalettes.GrokGroups.get('MeF');
|
|
20
19
|
|
|
21
|
-
const colorL = AminoacidsPalettes.GrokGroups.get('L');
|
|
22
|
-
const colorA = AminoacidsPalettes.GrokGroups.get('A');
|
|
23
|
-
const colorG = AminoacidsPalettes.GrokGroups.get('G');
|
|
24
|
-
const colorF = AminoacidsPalettes.GrokGroups.get('F');
|
|
20
|
+
const colorL = bio.AminoacidsPalettes.GrokGroups.get('L');
|
|
21
|
+
const colorA = bio.AminoacidsPalettes.GrokGroups.get('A');
|
|
22
|
+
const colorG = bio.AminoacidsPalettes.GrokGroups.get('G');
|
|
23
|
+
const colorF = bio.AminoacidsPalettes.GrokGroups.get('F');
|
|
25
24
|
|
|
26
25
|
expect(colorMeNle, colorL);
|
|
27
26
|
expect(colorMeA, colorA);
|
|
@@ -4,8 +4,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import * as bio from '@datagrok-libraries/bio';
|
|
5
5
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject, delay} from '@datagrok-libraries/utils/src/test';
|
|
7
|
-
import {Column} from 'datagrok-api/dg';
|
|
8
|
-
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
9
7
|
|
|
10
8
|
category('WebLogo-positions', () => {
|
|
11
9
|
let tvList: DG.TableView[];
|
|
@@ -38,8 +36,8 @@ ATC-G-TTGC--
|
|
|
38
36
|
|
|
39
37
|
const seqCol: DG.Column = df.getCol('seq');
|
|
40
38
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
41
|
-
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
42
|
-
seqCol.setTag(
|
|
39
|
+
seqCol.setTag(DG.TAGS.UNITS, bio.NOTATION.FASTA);
|
|
40
|
+
seqCol.setTag(bio.TAGS.alphabet, bio.ALPHABET.DNA);
|
|
43
41
|
|
|
44
42
|
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo')) as bio.WebLogo;
|
|
45
43
|
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
@@ -87,8 +85,8 @@ ATC-G-TTGC--
|
|
|
87
85
|
|
|
88
86
|
const seqCol: DG.Column = df.getCol('seq');
|
|
89
87
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
90
|
-
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
91
|
-
seqCol.setTag(
|
|
88
|
+
seqCol.setTag(DG.TAGS.UNITS, bio.NOTATION.FASTA);
|
|
89
|
+
seqCol.setTag(bio.TAGS.alphabet, bio.ALPHABET.DNA);
|
|
92
90
|
|
|
93
91
|
df.filter.init((i) => {
|
|
94
92
|
return i > 2;
|
|
@@ -131,8 +129,8 @@ ATC-G-TTGC--
|
|
|
131
129
|
|
|
132
130
|
const seqCol: DG.Column = df.getCol('seq');
|
|
133
131
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
134
|
-
seqCol.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
135
|
-
seqCol.setTag(
|
|
132
|
+
seqCol.setTag(DG.TAGS.UNITS, bio.NOTATION.FASTA);
|
|
133
|
+
seqCol.setTag(bio.TAGS.alphabet, bio.ALPHABET.DNA);
|
|
136
134
|
|
|
137
135
|
const wlViewer: bio.WebLogo = (await df.plot.fromType('WebLogo',
|
|
138
136
|
{'skipEmptyPositions': true})) as bio.WebLogo;
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
6
8
|
|
|
7
9
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
8
10
|
import {UNITS} from 'datagrok-api/dg';
|
|
9
|
-
import {ALPHABET, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
category('checkInputColumn', () => {
|
|
@@ -27,7 +28,7 @@ seq4`;
|
|
|
27
28
|
const col: DG.Column = df.getCol('seq');
|
|
28
29
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
29
30
|
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
30
|
-
col.setTag(
|
|
31
|
+
col.setTag(bio.TAGS.alphabet, bio.ALPHABET.DNA);
|
|
31
32
|
|
|
32
33
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
33
34
|
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
@@ -40,8 +41,8 @@ seq4`;
|
|
|
40
41
|
const col: DG.Column = df.getCol('seq');
|
|
41
42
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
42
43
|
col.setTag(DG.TAGS.UNITS, 'helm');
|
|
43
|
-
col.setTag(
|
|
44
|
-
col.setTag(
|
|
44
|
+
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
45
|
+
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
45
46
|
|
|
46
47
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
47
48
|
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
@@ -54,9 +55,9 @@ seq4`;
|
|
|
54
55
|
const col: DG.Column = df.getCol('seq');
|
|
55
56
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
56
57
|
col.setTag(DG.TAGS.UNITS, 'fasta');
|
|
57
|
-
col.setTag(
|
|
58
|
-
col.setTag(
|
|
59
|
-
col.setTag(
|
|
58
|
+
col.setTag(bio.TAGS.alphabet, 'UN');
|
|
59
|
+
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
60
|
+
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
60
61
|
|
|
61
62
|
const [res, msg]: [boolean, string] = checkInputColumn(
|
|
62
63
|
col, 'Test', ['fasta',], ['DNA', 'RNA', 'PT']);
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
5
|
|
|
5
6
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
7
|
|
|
7
8
|
import {ConverterFunc} from './types';
|
|
8
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
9
|
-
import {NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
9
|
|
|
11
10
|
// import {mmSemType} from '../const';
|
|
12
11
|
// import {importFasta} from '../package';
|
|
@@ -133,12 +132,12 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
133
132
|
return _csvDfs[key];
|
|
134
133
|
};
|
|
135
134
|
|
|
136
|
-
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
137
|
-
if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
|
|
135
|
+
function converter(tgtNotation: bio.NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
136
|
+
if (tgtNotation === bio.NOTATION.SEPARATOR && !tgtSeparator)
|
|
138
137
|
throw new Error(`Argument 'separator' is missed for notation '${tgtNotation.toString()}'.`);
|
|
139
138
|
|
|
140
139
|
return function(srcCol: DG.Column): DG.Column {
|
|
141
|
-
const converter = new NotationConverter(srcCol);
|
|
140
|
+
const converter = new bio.NotationConverter(srcCol);
|
|
142
141
|
const resCol = converter.convert(tgtNotation, tgtSeparator);
|
|
143
142
|
expect(resCol.getTag('units'), tgtNotation);
|
|
144
143
|
return resCol;
|
|
@@ -157,100 +156,100 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
157
156
|
const tgtCol: DG.Column = tgtDf.getCol('seq');
|
|
158
157
|
|
|
159
158
|
expectArray(resCol.toList(), tgtCol.toList());
|
|
160
|
-
const uh: UnitsHandler = new UnitsHandler(resCol);
|
|
159
|
+
const uh: bio.UnitsHandler = new bio.UnitsHandler(resCol);
|
|
161
160
|
}
|
|
162
161
|
|
|
163
162
|
// FASTA tests
|
|
164
163
|
// fasta -> separator
|
|
165
164
|
test('testFastaPtToSeparator', async () => {
|
|
166
|
-
await _testConvert(Samples.fastaPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
165
|
+
await _testConvert(Samples.fastaPt, converter(bio.NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
167
166
|
});
|
|
168
167
|
test('testFastaDnaToSeparator', async () => {
|
|
169
|
-
await _testConvert(Samples.fastaDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
168
|
+
await _testConvert(Samples.fastaDna, converter(bio.NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
170
169
|
});
|
|
171
170
|
test('testFastaRnaToSeparator', async () => {
|
|
172
|
-
await _testConvert(Samples.fastaRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
171
|
+
await _testConvert(Samples.fastaRna, converter(bio.NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
173
172
|
});
|
|
174
173
|
test('testFastaGapsToSeparator', async () => {
|
|
175
|
-
await _testConvert(Samples.fastaGaps, converter(NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
174
|
+
await _testConvert(Samples.fastaGaps, converter(bio.NOTATION.SEPARATOR, '/'), Samples.separatorGaps);
|
|
176
175
|
});
|
|
177
176
|
|
|
178
177
|
// fasta -> helm
|
|
179
178
|
test('testFastaPtToHelm', async () => {
|
|
180
|
-
await _testConvert(Samples.fastaPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
179
|
+
await _testConvert(Samples.fastaPt, converter(bio.NOTATION.HELM), Samples.helmPt);
|
|
181
180
|
});
|
|
182
181
|
test('testFastaDnaToHelm', async () => {
|
|
183
|
-
await _testConvert(Samples.fastaDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
182
|
+
await _testConvert(Samples.fastaDna, converter(bio.NOTATION.HELM), Samples.helmDna);
|
|
184
183
|
});
|
|
185
184
|
test('testFastaRnaToHelm', async () => {
|
|
186
|
-
await _testConvert(Samples.fastaRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
185
|
+
await _testConvert(Samples.fastaRna, converter(bio.NOTATION.HELM), Samples.helmRna);
|
|
187
186
|
});
|
|
188
187
|
test('testFastaGapsToHelm', async () => {
|
|
189
|
-
await _testConvert(Samples.fastaGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
188
|
+
await _testConvert(Samples.fastaGaps, converter(bio.NOTATION.HELM), Samples.helmGaps);
|
|
190
189
|
});
|
|
191
190
|
|
|
192
191
|
|
|
193
192
|
// SEPARATOR tests
|
|
194
193
|
// separator -> fasta
|
|
195
194
|
test('testSeparatorPtToFasta', async () => {
|
|
196
|
-
await _testConvert(Samples.separatorPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
195
|
+
await _testConvert(Samples.separatorPt, converter(bio.NOTATION.FASTA), Samples.fastaPt);
|
|
197
196
|
});
|
|
198
197
|
test('testSeparatorDnaToFasta', async () => {
|
|
199
|
-
await _testConvert(Samples.separatorDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
198
|
+
await _testConvert(Samples.separatorDna, converter(bio.NOTATION.FASTA), Samples.fastaDna);
|
|
200
199
|
});
|
|
201
200
|
test('testSeparatorRnaToFasta', async () => {
|
|
202
|
-
await _testConvert(Samples.separatorRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
201
|
+
await _testConvert(Samples.separatorRna, converter(bio.NOTATION.FASTA), Samples.fastaRna);
|
|
203
202
|
});
|
|
204
203
|
test('testSeparatorGapsToFasta', async () => {
|
|
205
|
-
await _testConvert(Samples.separatorGaps, converter(NOTATION.FASTA), Samples.fastaGaps);
|
|
204
|
+
await _testConvert(Samples.separatorGaps, converter(bio.NOTATION.FASTA), Samples.fastaGaps);
|
|
206
205
|
});
|
|
207
206
|
|
|
208
207
|
// separator -> helm
|
|
209
208
|
test('testSeparatorPtToHelm', async () => {
|
|
210
|
-
await _testConvert(Samples.separatorPt, converter(NOTATION.HELM), Samples.helmPt);
|
|
209
|
+
await _testConvert(Samples.separatorPt, converter(bio.NOTATION.HELM), Samples.helmPt);
|
|
211
210
|
});
|
|
212
211
|
test('testSeparatorDnaToHelm', async () => {
|
|
213
|
-
await _testConvert(Samples.separatorDna, converter(NOTATION.HELM), Samples.helmDna);
|
|
212
|
+
await _testConvert(Samples.separatorDna, converter(bio.NOTATION.HELM), Samples.helmDna);
|
|
214
213
|
});
|
|
215
214
|
test('testSeparatorRnaToHelm', async () => {
|
|
216
|
-
await _testConvert(Samples.separatorRna, converter(NOTATION.HELM), Samples.helmRna);
|
|
215
|
+
await _testConvert(Samples.separatorRna, converter(bio.NOTATION.HELM), Samples.helmRna);
|
|
217
216
|
});
|
|
218
217
|
test('testSeparatorGapsToHelm', async () => {
|
|
219
|
-
await _testConvert(Samples.separatorGaps, converter(NOTATION.HELM), Samples.helmGaps);
|
|
218
|
+
await _testConvert(Samples.separatorGaps, converter(bio.NOTATION.HELM), Samples.helmGaps);
|
|
220
219
|
});
|
|
221
220
|
|
|
222
221
|
|
|
223
222
|
// HELM tests
|
|
224
223
|
// helm -> fasta
|
|
225
224
|
test('testHelmDnaToFasta', async () => {
|
|
226
|
-
await _testConvert(Samples.helmDna, converter(NOTATION.FASTA), Samples.fastaDna);
|
|
225
|
+
await _testConvert(Samples.helmDna, converter(bio.NOTATION.FASTA), Samples.fastaDna);
|
|
227
226
|
});
|
|
228
227
|
test('testHelmRnaToFasta', async () => {
|
|
229
|
-
await _testConvert(Samples.helmRna, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
228
|
+
await _testConvert(Samples.helmRna, converter(bio.NOTATION.FASTA), Samples.fastaRna);
|
|
230
229
|
});
|
|
231
230
|
test('testHelmPtToFasta', async () => {
|
|
232
|
-
await _testConvert(Samples.helmPt, converter(NOTATION.FASTA), Samples.fastaPt);
|
|
231
|
+
await _testConvert(Samples.helmPt, converter(bio.NOTATION.FASTA), Samples.fastaPt);
|
|
233
232
|
});
|
|
234
233
|
|
|
235
234
|
// helm -> separator
|
|
236
235
|
test('testHelmDnaToSeparator', async () => {
|
|
237
|
-
await _testConvert(Samples.helmDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
236
|
+
await _testConvert(Samples.helmDna, converter(bio.NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
238
237
|
});
|
|
239
238
|
test('testHelmRnaToSeparator', async () => {
|
|
240
|
-
await _testConvert(Samples.helmRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
239
|
+
await _testConvert(Samples.helmRna, converter(bio.NOTATION.SEPARATOR, '*'), Samples.separatorRna);
|
|
241
240
|
});
|
|
242
241
|
test('testHelmPtToSeparator', async () => {
|
|
243
|
-
await _testConvert(Samples.helmPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
242
|
+
await _testConvert(Samples.helmPt, converter(bio.NOTATION.SEPARATOR, '-'), Samples.separatorPt);
|
|
244
243
|
});
|
|
245
244
|
|
|
246
245
|
// helm miscellaneous
|
|
247
246
|
test('testHelmLoneRibose', async () => {
|
|
248
|
-
await _testConvert(Samples.helmLoneRibose, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
247
|
+
await _testConvert(Samples.helmLoneRibose, converter(bio.NOTATION.FASTA), Samples.fastaRna);
|
|
249
248
|
});
|
|
250
249
|
test('testHelmLoneDeoxyribose', async () => {
|
|
251
|
-
await _testConvert(Samples.helmLoneDeoxyribose, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
250
|
+
await _testConvert(Samples.helmLoneDeoxyribose, converter(bio.NOTATION.SEPARATOR, '/'), Samples.separatorDna);
|
|
252
251
|
});
|
|
253
252
|
test('testHelmLonePhosphorus', async () => {
|
|
254
|
-
await _testConvert(Samples.helmLonePhosphorus, converter(NOTATION.FASTA), Samples.fastaRna);
|
|
253
|
+
await _testConvert(Samples.helmLonePhosphorus, converter(bio.NOTATION.FASTA), Samples.fastaRna);
|
|
255
254
|
});
|
|
256
255
|
});
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as ui from 'datagrok-api/ui';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
6
|
+
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
6
7
|
|
|
7
8
|
import {importFasta} from '../package';
|
|
8
9
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
@@ -400,10 +401,10 @@ export async function _testPos(
|
|
|
400
401
|
|
|
401
402
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
|
|
402
403
|
expect(col.getTag(DG.TAGS.UNITS), units);
|
|
403
|
-
expect(col.getTag(
|
|
404
|
-
expect(col.getTag(
|
|
404
|
+
expect(col.getTag(bio.TAGS.aligned), aligned);
|
|
405
|
+
expect(col.getTag(bio.TAGS.alphabet), alphabet);
|
|
405
406
|
if (separator)
|
|
406
|
-
expect(col.getTag(
|
|
407
|
+
expect(col.getTag(bio.TAGS.separator), separator);
|
|
407
408
|
|
|
408
409
|
const uh = new UnitsHandler(col);
|
|
409
410
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
2
|
-
|
|
3
1
|
import * as grok from 'datagrok-api/grok';
|
|
4
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
4
|
+
|
|
5
|
+
import {after, before, category, delay, expect, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
+
|
|
5
7
|
import {importFasta, multipleSequenceAlignmentAny} from '../package';
|
|
6
8
|
import {convertDo} from '../utils/convert';
|
|
7
|
-
import {ALPHABET, NOTATION, UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
9
|
import {SEM_TYPES, TAGS} from '../utils/constants';
|
|
9
10
|
import {generateLongSequence, generateManySequences, performanceTest} from './test-sequnces-generators';
|
|
10
11
|
|
|
@@ -137,22 +138,22 @@ category('renderers', () => {
|
|
|
137
138
|
`semType="${srcSeqCol!.semType}", units="${srcSeqCol!.getTag(DG.TAGS.UNITS)}", ` +
|
|
138
139
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
139
140
|
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
140
|
-
expect(srcSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
141
|
-
expect(srcSeqCol.getTag(
|
|
142
|
-
expect(srcSeqCol.getTag(
|
|
141
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
142
|
+
expect(srcSeqCol.getTag(bio.TAGS.aligned), 'SEQ');
|
|
143
|
+
expect(srcSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
143
144
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
144
145
|
|
|
145
146
|
const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
|
|
146
147
|
tv.grid.invalidate();
|
|
147
148
|
|
|
148
149
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
149
|
-
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
150
|
-
expect(msaSeqCol.getTag(
|
|
151
|
-
expect(msaSeqCol.getTag(
|
|
150
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
151
|
+
expect(msaSeqCol.getTag(bio.TAGS.aligned), 'SEQ.MSA');
|
|
152
|
+
expect(msaSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
152
153
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
153
154
|
|
|
154
155
|
// check newColumn with UnitsHandler constructor
|
|
155
|
-
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
156
|
+
const uh: bio.UnitsHandler = new bio.UnitsHandler(msaSeqCol);
|
|
156
157
|
|
|
157
158
|
dfList.push(df);
|
|
158
159
|
tvList.push(tv);
|
|
@@ -174,13 +175,13 @@ category('renderers', () => {
|
|
|
174
175
|
tvList.push(tv);
|
|
175
176
|
dfList.push(df);
|
|
176
177
|
|
|
177
|
-
const tgtCol: DG.Column = await convertDo(srcCol, NOTATION.SEPARATOR, '/');
|
|
178
|
+
const tgtCol: DG.Column = await convertDo(srcCol, bio.NOTATION.SEPARATOR, '/');
|
|
178
179
|
|
|
179
180
|
const resCellRenderer = tgtCol.getTag(DG.TAGS.CELL_RENDERER);
|
|
180
181
|
expect(resCellRenderer, 'sequence');
|
|
181
182
|
|
|
182
183
|
// check tgtCol with UnitsHandler constructor
|
|
183
|
-
const uh: UnitsHandler = new UnitsHandler(tgtCol);
|
|
184
|
+
const uh: bio.UnitsHandler = new bio.UnitsHandler(tgtCol);
|
|
184
185
|
}
|
|
185
186
|
|
|
186
187
|
async function _selectRendererBySemType() {
|
|
@@ -7,23 +7,6 @@ import * as C from './constants';
|
|
|
7
7
|
const undefinedColor = 'rgb(100,100,100)';
|
|
8
8
|
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = bio.monomerToShort;
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
function getPaletteByType(paletteType: string): bio.SeqPalette {
|
|
12
|
-
switch (paletteType) {
|
|
13
|
-
case 'PT':
|
|
14
|
-
return bio.AminoacidsPalettes.GrokGroups;
|
|
15
|
-
case 'NT':
|
|
16
|
-
return bio.NucleotidesPalettes.Chromatogram;
|
|
17
|
-
case 'DNA':
|
|
18
|
-
return bio.NucleotidesPalettes.Chromatogram;
|
|
19
|
-
case 'RNA':
|
|
20
|
-
return bio.NucleotidesPalettes.Chromatogram;
|
|
21
|
-
// other
|
|
22
|
-
default:
|
|
23
|
-
return bio.UnknownSeqPalettes.Color;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
10
|
function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: number, w: number): number {
|
|
28
11
|
return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
29
12
|
}
|
|
@@ -61,7 +44,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
61
44
|
}
|
|
62
45
|
|
|
63
46
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
64
|
-
if (gridCell.cell.column.getTag(bio.
|
|
47
|
+
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== 'SEQ.MSA')
|
|
65
48
|
return;
|
|
66
49
|
|
|
67
50
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|
|
@@ -126,7 +109,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
126
109
|
//TODO: can this be replaced/merged with splitSequence?
|
|
127
110
|
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
128
111
|
|
|
129
|
-
const palette = getPaletteByType(paletteType);
|
|
112
|
+
const palette = bio.getPaletteByType(paletteType);
|
|
130
113
|
|
|
131
114
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
132
115
|
const splitLimit = gridCell.bounds.width / 5;
|
|
@@ -227,8 +210,10 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
227
210
|
g.textBaseline = 'middle';
|
|
228
211
|
g.textAlign = 'center';
|
|
229
212
|
|
|
230
|
-
const palette = getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
|
|
231
|
-
const s: string = gridCell.cell.value
|
|
213
|
+
const palette = bio.getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
|
|
214
|
+
const s: string = gridCell.cell.value;
|
|
215
|
+
if (!s)
|
|
216
|
+
return;
|
|
232
217
|
const color = palette.get(s);
|
|
233
218
|
|
|
234
219
|
g.fillStyle = color;
|
|
@@ -271,7 +256,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
271
256
|
const splitter = bio.getSplitter(units, separator);
|
|
272
257
|
const subParts1 = splitter(s1);
|
|
273
258
|
const subParts2 = splitter(s2);
|
|
274
|
-
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units
|
|
259
|
+
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
|
|
275
260
|
}
|
|
276
261
|
}
|
|
277
262
|
|
|
@@ -312,7 +297,7 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
312
297
|
|
|
313
298
|
let palette: bio.SeqPalette = bio.UnknownSeqPalettes.Color;
|
|
314
299
|
if (units != 'HELM')
|
|
315
|
-
palette = getPaletteByType(units.substring(units.length - 2));
|
|
300
|
+
palette = bio.getPaletteByType(units.substring(units.length - 2));
|
|
316
301
|
|
|
317
302
|
const vShift = 7;
|
|
318
303
|
for (let i = 0; i < subParts1.length; i++) {
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
6
|
|
|
6
7
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
7
8
|
|
|
@@ -58,14 +59,14 @@ export async function runKalign(srcCol: DG.Column, isAligned = false, unUsedName
|
|
|
58
59
|
// units
|
|
59
60
|
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
60
61
|
//aligned
|
|
61
|
-
const srcAligned = srcCol.getTag(
|
|
62
|
+
const srcAligned = srcCol.getTag(bio.TAGS.aligned);
|
|
62
63
|
const tgtAligned = srcAligned + '.MSA';
|
|
63
64
|
//alphabet
|
|
64
|
-
const srcAlphabet = srcCol.getTag(
|
|
65
|
+
const srcAlphabet = srcCol.getTag(bio.TAGS.alphabet);
|
|
65
66
|
|
|
66
67
|
tgtCol.setTag(DG.TAGS.UNITS, srcUnits);
|
|
67
|
-
tgtCol.setTag(
|
|
68
|
-
tgtCol.setTag(
|
|
68
|
+
tgtCol.setTag(bio.TAGS.aligned, tgtAligned);
|
|
69
|
+
tgtCol.setTag(bio.TAGS.alphabet, srcAlphabet);
|
|
69
70
|
tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
70
71
|
return tgtCol;
|
|
71
72
|
}
|
package/src/utils/utils.ts
CHANGED
|
@@ -19,7 +19,7 @@ export function encodeMonomers(col: DG.Column): DG.Column | null {
|
|
|
19
19
|
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
20
20
|
const monomerSymbolDict: { [key: string]: number } = {};
|
|
21
21
|
const units = col.tags[DG.TAGS.UNITS];
|
|
22
|
-
const sep = col.getTag(
|
|
22
|
+
const sep = col.getTag(bio.TAGS.separator);
|
|
23
23
|
const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
|
|
24
24
|
const encodedStringArray = [];
|
|
25
25
|
for (let i = 0; i < col.length; ++i) {
|