@datagrok/bio 2.0.25 → 2.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +7 -2
- package/dist/package-test.js +58854 -1599
- package/dist/package.js +58891 -1447
- package/files/tests/filter_FASTA.csv +14 -0
- package/files/tests/filter_HELM.csv +5 -0
- package/files/tests/filter_MSA.csv +9 -0
- package/files/tests/nucleotidePairs.csv +146690 -0
- package/files/tests/peptidePairs.csv +103685 -0
- package/files/tests/toAtomicLevelTest.csv +7 -0
- package/package.json +6 -7
- package/setup.sh +1 -1
- package/src/calculations/monomerLevelMols.ts +7 -4
- package/src/package-test.ts +2 -1
- package/src/package.ts +12 -18
- package/src/substructure-search/substructure-search.ts +40 -13
- package/src/tests/{WebLogo-test.ts → bio-tests.ts} +16 -2
- package/src/tests/checkInputColumn-tests.ts +6 -7
- package/src/tests/detectors-test.ts +26 -20
- package/src/tests/renderers-test.ts +7 -5
- package/src/tests/substructure-filter-tests.ts +61 -0
- package/src/tests/test-sequnces-generators.ts +8 -6
- package/src/utils/cell-renderer.ts +4 -2
- package/src/widgets/bio-substructure-filter.ts +26 -7
- package/src/widgets/representations.ts +3 -2
- package/src/const.ts +0 -30
- package/src/monomer-library.ts +0 -199
- package/src/utils/utils.ts +0 -135
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.0.
|
|
8
|
+
"version": "2.0.27",
|
|
9
9
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -14,15 +14,10 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@biowasm/aioli": "^3.1.0",
|
|
17
|
-
"@datagrok-libraries/bio": "^5.
|
|
17
|
+
"@datagrok-libraries/bio": "^5.5.0",
|
|
18
18
|
"@datagrok-libraries/chem-meta": "1.0.1",
|
|
19
19
|
"@datagrok-libraries/ml": "^6.2.0",
|
|
20
20
|
"@datagrok-libraries/utils": "^1.11.1",
|
|
21
|
-
"@deck.gl/core": "^8.7.5",
|
|
22
|
-
"@deck.gl/layers": "^8.7.5",
|
|
23
|
-
"@luma.gl/constants": "^8.5.10",
|
|
24
|
-
"@luma.gl/core": "^8.5.10",
|
|
25
|
-
"@phylocanvas/phylocanvas.gl": "^1.44.0",
|
|
26
21
|
"cash-dom": "^8.0.0",
|
|
27
22
|
"datagrok-api": "^1.7.0",
|
|
28
23
|
"dayjs": "^1.11.4",
|
|
@@ -51,6 +46,10 @@
|
|
|
51
46
|
"webpack": "^5.64.1",
|
|
52
47
|
"webpack-cli": "^4.6.0"
|
|
53
48
|
},
|
|
49
|
+
"grokDependencies": {
|
|
50
|
+
"@datagrok/chem": "1.3.16",
|
|
51
|
+
"@datagrok/helm": "latest"
|
|
52
|
+
},
|
|
54
53
|
"scripts": {
|
|
55
54
|
"link-api": "npm link datagrok-api",
|
|
56
55
|
"link-bio": "npm link @datagrok-libraries/bio",
|
package/setup.sh
CHANGED
|
@@ -8,17 +8,20 @@ import {getHelmMonomers} from '../package';
|
|
|
8
8
|
|
|
9
9
|
const V2000_ATOM_NAME_POS = 31;
|
|
10
10
|
|
|
11
|
-
export async function getMonomericMols(mcol: DG.Column,
|
|
11
|
+
export async function getMonomericMols(mcol: DG.Column,
|
|
12
|
+
pattern: boolean = false, monomersDict?: Map<string, string>): Promise<DG.Column> {
|
|
12
13
|
const separator: string = mcol.tags[C.TAGS.SEPARATOR];
|
|
13
14
|
const units: string = mcol.tags[DG.TAGS.UNITS];
|
|
14
15
|
const splitter = bio.getSplitter(units, separator);
|
|
15
16
|
let molV3000Array;
|
|
16
|
-
|
|
17
|
+
monomersDict ??= new Map();
|
|
17
18
|
const monomers = units === 'helm' ?
|
|
18
19
|
getHelmMonomers(mcol) : Object.keys(bio.getStats(mcol, 0, splitter).freq).filter((it) => it !== '');
|
|
19
20
|
|
|
20
|
-
for (let i = 0; i < monomers.length; i++)
|
|
21
|
-
monomersDict.
|
|
21
|
+
for (let i = 0; i < monomers.length; i++) {
|
|
22
|
+
if (!monomersDict.has(monomers[i]))
|
|
23
|
+
monomersDict.set(monomers[i], `${monomersDict.size + 1}`);
|
|
24
|
+
}
|
|
22
25
|
|
|
23
26
|
if (units === 'helm') {
|
|
24
27
|
molV3000Array = await grok.functions.call('HELM:getMolFiles', {col: mcol});
|
package/src/package-test.ts
CHANGED
|
@@ -2,7 +2,6 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
|
|
3
3
|
import {runTests, TestContext, tests} from '@datagrok-libraries/utils/src/test';
|
|
4
4
|
|
|
5
|
-
import './tests/WebLogo-test';
|
|
6
5
|
import './tests/Palettes-test';
|
|
7
6
|
import './tests/detectors-test';
|
|
8
7
|
import './tests/msa-tests';
|
|
@@ -13,9 +12,11 @@ import './tests/renderers-test';
|
|
|
13
12
|
import './tests/convert-test';
|
|
14
13
|
import './tests/fasta-handler-test';
|
|
15
14
|
import './tests/fasta-export-tests';
|
|
15
|
+
import './tests/bio-tests';
|
|
16
16
|
import './tests/WebLogo-positions-test';
|
|
17
17
|
import './tests/checkInputColumn-tests';
|
|
18
18
|
import './tests/similarity-diversity-tests';
|
|
19
|
+
import './tests/substructure-filter-tests';
|
|
19
20
|
|
|
20
21
|
export const _package = new DG.Package();
|
|
21
22
|
export {tests};
|
package/src/package.ts
CHANGED
|
@@ -13,11 +13,16 @@ import {SequenceAlignment, Aligned} from './seq_align';
|
|
|
13
13
|
import {getEmbeddingColsNames, sequenceSpace} from './analysis/sequence-space';
|
|
14
14
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
15
15
|
import {createPropPanelElement, createTooltipElement, getSimilaritiesMarix} from './analysis/sequence-activity-cliffs';
|
|
16
|
-
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq
|
|
16
|
+
import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
|
|
17
|
+
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
17
18
|
import {getMacroMol} from './utils/atomic-works';
|
|
18
19
|
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
19
20
|
import {convert} from './utils/convert';
|
|
20
21
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
22
|
+
import {TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
23
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule'
|
|
24
|
+
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/utils/to-atomic-level';
|
|
25
|
+
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
21
26
|
import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-utils';
|
|
22
27
|
import {
|
|
23
28
|
generateManySequences,
|
|
@@ -99,7 +104,7 @@ export function checkInputColumn(
|
|
|
99
104
|
) {
|
|
100
105
|
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
101
106
|
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
102
|
-
msg = `${name} analysis is allowed for Macromolecules with ${notationAdd}.`;
|
|
107
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
103
108
|
res = false;
|
|
104
109
|
} else if (!uh.isHelm()) {
|
|
105
110
|
// alphabet is not specified for 'helm' notation
|
|
@@ -169,10 +174,10 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
169
174
|
'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
|
|
170
175
|
};
|
|
171
176
|
const tags = {
|
|
172
|
-
'units': macroMolecule.
|
|
173
|
-
'aligned': macroMolecule.
|
|
174
|
-
'separator': macroMolecule.
|
|
175
|
-
'alphabet': macroMolecule.
|
|
177
|
+
'units': macroMolecule.getTag(DG.TAGS.UNITS),
|
|
178
|
+
'aligned': macroMolecule.getTag(TAGS.aligned),
|
|
179
|
+
'separator': macroMolecule.getTag(TAGS.separator),
|
|
180
|
+
'alphabet': macroMolecule.getTag(TAGS.alphabet),
|
|
176
181
|
};
|
|
177
182
|
const sp = await getActivityCliffs(
|
|
178
183
|
df,
|
|
@@ -247,20 +252,11 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
247
252
|
}
|
|
248
253
|
if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
|
|
249
254
|
return;
|
|
250
|
-
|
|
251
255
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
252
256
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
253
|
-
|
|
254
|
-
const result = await getMacroMol(atomicCodes!);
|
|
255
|
-
|
|
256
|
-
const col = DG.Column.fromStrings('regenerated', result);
|
|
257
|
-
col.semType = DG.SEMTYPE.MOLECULE;
|
|
258
|
-
col.tags[DG.TAGS.UNITS] = 'molblock';
|
|
259
|
-
df.columns.add(col, true);
|
|
260
|
-
await grok.data.detectSemanticTypes(df);
|
|
257
|
+
_toAtomicLevel(df, macroMolecule, monomersLibObject);
|
|
261
258
|
}
|
|
262
259
|
|
|
263
|
-
|
|
264
260
|
//top-menu: Bio | MSA...
|
|
265
261
|
//name: MSA
|
|
266
262
|
//input: dataframe table
|
|
@@ -528,5 +524,3 @@ export function saveAsFasta() {
|
|
|
528
524
|
export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
529
525
|
return new BioSubstructureFilter();
|
|
530
526
|
}
|
|
531
|
-
|
|
532
|
-
|
|
@@ -8,6 +8,14 @@ import {getMonomericMols} from '../calculations/monomerLevelMols';
|
|
|
8
8
|
import {BitSet} from 'datagrok-api/dg';
|
|
9
9
|
import {updateDivInnerHTML} from '../utils/ui-utils';
|
|
10
10
|
|
|
11
|
+
export const MONOMER_MOLS_COL = 'monomeric-mols';
|
|
12
|
+
|
|
13
|
+
const enum MONOMERIC_COL_TAGS{
|
|
14
|
+
MONOMERIC_MOLS = 'monomeric-mols',
|
|
15
|
+
LAST_INVALIDATED_VERSION = 'last-invalidated-version',
|
|
16
|
+
MONOMERS_DICT = 'monomers-dict'
|
|
17
|
+
}
|
|
18
|
+
|
|
11
19
|
/**
|
|
12
20
|
* Searches substructure in each row of Macromolecule column
|
|
13
21
|
*
|
|
@@ -67,29 +75,48 @@ export function substructureSearchDialog(col: DG.Column): void {
|
|
|
67
75
|
.show();
|
|
68
76
|
}
|
|
69
77
|
|
|
70
|
-
export function linearSubstructureSearch(substructure: string, col: DG.Column): DG.BitSet {
|
|
71
|
-
const
|
|
78
|
+
export function linearSubstructureSearch(substructure: string, col: DG.Column, separator?: string): DG.BitSet {
|
|
79
|
+
const re = separator ? prepareSubstructureRegex(substructure, separator) : substructure;
|
|
72
80
|
const resultArray = DG.BitSet.create(col.length);
|
|
73
81
|
for (let i = 0; i < col.length; i++) {
|
|
74
|
-
const macromolecule = col.get(i)
|
|
75
|
-
if (macromolecule.
|
|
82
|
+
const macromolecule = col.get(i);
|
|
83
|
+
if (macromolecule.match(re) || macromolecule === substructure)
|
|
76
84
|
resultArray.set(i, true, false);
|
|
77
85
|
}
|
|
78
86
|
return resultArray;
|
|
79
87
|
}
|
|
80
88
|
|
|
89
|
+
function prepareSubstructureRegex(substructure: string, separator: string) {
|
|
90
|
+
const char = `${separator}`.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
|
|
91
|
+
const startsWithSep = substructure.charAt(0) === separator;
|
|
92
|
+
const endsWithSep = substructure.charAt(substructure.length - 1) === separator;
|
|
93
|
+
const substrWithoutSep = substructure.replace(new RegExp(`^${char}|${char}$`, 'g'), '');
|
|
94
|
+
const re = startsWithSep ? endsWithSep ? `${char}${substrWithoutSep}${char}` :
|
|
95
|
+
`${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$` :
|
|
96
|
+
endsWithSep ? `^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}` :
|
|
97
|
+
`^${substrWithoutSep}${char}|${char}${substrWithoutSep}${char}|${char}${substrWithoutSep}$`;
|
|
98
|
+
return re;
|
|
99
|
+
}
|
|
100
|
+
|
|
81
101
|
export async function helmSubstructureSearch(substructure: string, col: DG.Column): Promise<BitSet> {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
monomericMolsDf.rows.removeAt(col.length);
|
|
102
|
+
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
103
|
+
await invalidateHelmMols(col);
|
|
104
|
+
const substructureCol = DG.Column.string('helm', 1).init((i) => substructure);
|
|
105
|
+
substructureCol.setTag(DG.TAGS.UNITS, bio.NOTATION.HELM);
|
|
106
|
+
const substructureMolsCol =
|
|
107
|
+
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
89
108
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
90
|
-
molStringsColumn:
|
|
91
|
-
molString:
|
|
109
|
+
molStringsColumn: col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS],
|
|
110
|
+
molString: substructureMolsCol.get(0),
|
|
92
111
|
molBlockFailover: '',
|
|
93
112
|
});
|
|
94
113
|
return matchesCol.get(0);
|
|
95
114
|
}
|
|
115
|
+
|
|
116
|
+
export async function invalidateHelmMols(col: DG.Column) {
|
|
117
|
+
const monomersDict = new Map();
|
|
118
|
+
const monomericMolsCol = await getMonomericMols(col, true, monomersDict);
|
|
119
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS] = monomericMolsCol;
|
|
120
|
+
col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT] = monomersDict;
|
|
121
|
+
col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION] = col.version;
|
|
122
|
+
}
|
|
@@ -5,7 +5,7 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
5
5
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
|
-
category('
|
|
8
|
+
category('bio', () => {
|
|
9
9
|
const csvDfN1: string = `seq
|
|
10
10
|
ACGTCT
|
|
11
11
|
CAGTGT
|
|
@@ -47,7 +47,21 @@ XZJ{}2
|
|
|
47
47
|
`;
|
|
48
48
|
|
|
49
49
|
// anonymous functions specified in test() registering must return Promise<any>
|
|
50
|
-
test('
|
|
50
|
+
test('testGetStatsHelm1', async () => {
|
|
51
|
+
const csv = `seq
|
|
52
|
+
PEPTIDE1{meI}$$$$
|
|
53
|
+
`;
|
|
54
|
+
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
55
|
+
const seqCol: DG.Column = df.getCol('seq')!;
|
|
56
|
+
const stats = bio.getStats(seqCol, 1, bio.splitterAsHelm);
|
|
57
|
+
|
|
58
|
+
expectObject(stats.freq, {
|
|
59
|
+
'meI': 1
|
|
60
|
+
});
|
|
61
|
+
expect(stats.sameLength, true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('testGetStatsN1', async () => { await _testGetStats(csvDfN1); });
|
|
51
65
|
test('testGetAlphabetSimilarity', async () => { await _testGetAlphabetSimilarity(); });
|
|
52
66
|
|
|
53
67
|
test('testPickupPaletteN1', async () => { await _testPickupPaletteN1(csvDfN1); });
|
|
@@ -8,10 +8,9 @@ import {after, before, category, test, expect, expectArray} from '@datagrok-libr
|
|
|
8
8
|
|
|
9
9
|
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
10
10
|
import {UNITS} from 'datagrok-api/dg';
|
|
11
|
-
|
|
11
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
12
|
|
|
13
13
|
category('checkInputColumn', () => {
|
|
14
|
-
|
|
15
14
|
const csv = `seq
|
|
16
15
|
seq1,
|
|
17
16
|
seq2,
|
|
@@ -22,7 +21,7 @@ seq4`;
|
|
|
22
21
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
23
22
|
const funcInputColumnProperty: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
24
23
|
|
|
25
|
-
|
|
24
|
+
const k = 11;
|
|
26
25
|
|
|
27
26
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
28
27
|
const col: DG.Column = df.getCol('seq');
|
|
@@ -42,7 +41,7 @@ seq4`;
|
|
|
42
41
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
43
42
|
const col: DG.Column = df.getCol('seq');
|
|
44
43
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
45
|
-
col.setTag(DG.TAGS.UNITS,
|
|
44
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
|
|
46
45
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
47
46
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
48
47
|
|
|
@@ -57,7 +56,7 @@ seq4`;
|
|
|
57
56
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
58
57
|
const col: DG.Column = df.getCol('seq');
|
|
59
58
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
60
|
-
col.setTag(DG.TAGS.UNITS,
|
|
59
|
+
col.setTag(DG.TAGS.UNITS, NOTATION.FASTA);
|
|
61
60
|
col.setTag(bio.TAGS.alphabet, 'UN');
|
|
62
61
|
col.setTag(bio.TAGS.alphabetSize, '11');
|
|
63
62
|
col.setTag(bio.TAGS.alphabetIsMultichar, 'true');
|
|
@@ -73,6 +72,6 @@ seq4`;
|
|
|
73
72
|
test('testGetActionFunctionMeta', async () => {
|
|
74
73
|
const func: DG.Func = DG.Func.find({package: 'Bio', name: 'multipleSequenceAlignmentAny'})[0];
|
|
75
74
|
const sequenceInput: DG.Property = func.inputs.find((i) => i.name == 'sequence')!;
|
|
76
|
-
|
|
75
|
+
const k = 11;
|
|
77
76
|
});
|
|
78
|
-
});
|
|
77
|
+
});
|
|
@@ -6,7 +6,6 @@ import * as bio from '@datagrok-libraries/bio';
|
|
|
6
6
|
import {after, before, category, test, expect, expectObject} from '@datagrok-libraries/utils/src/test';
|
|
7
7
|
|
|
8
8
|
import {importFasta} from '../package';
|
|
9
|
-
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
9
|
|
|
11
10
|
type DfReaderFunc = () => Promise<DG.DataFrame>;
|
|
12
11
|
|
|
@@ -200,44 +199,49 @@ MWRSWY-CKHP
|
|
|
200
199
|
test('NegativeSmiles', async () => { await _testNeg(readCsv('csvDfSmiles', csvDfSmiles), 'col1'); });
|
|
201
200
|
|
|
202
201
|
test('Dna1', async () => {
|
|
203
|
-
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq',
|
|
202
|
+
await _testPos(readCsv('csvDfDna1', csvDfDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false);
|
|
204
203
|
});
|
|
205
204
|
test('Rna1', async () => {
|
|
206
|
-
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq',
|
|
205
|
+
await _testPos(readCsv('csvDfRna1', csvDfRna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false);
|
|
207
206
|
});
|
|
208
207
|
test('AA1', async () => {
|
|
209
|
-
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq',
|
|
208
|
+
await _testPos(readCsv('csvDfPt1', csvDfPt1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
210
209
|
});
|
|
211
210
|
test('MsaDna1', async () => {
|
|
212
|
-
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq',
|
|
211
|
+
await _testPos(readCsv('csvDfMsaDna1', csvDfMsaDna1), 'seq', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false);
|
|
213
212
|
});
|
|
214
213
|
|
|
215
214
|
test('MsaAA1', async () => {
|
|
216
|
-
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq',
|
|
215
|
+
await _testPos(readCsv('csvDfMsaPt1', csvDfMsaPt1), 'seq', bio.NOTATION.FASTA,
|
|
216
|
+
bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.PT, 20, false);
|
|
217
217
|
});
|
|
218
218
|
|
|
219
219
|
test('SepDna', async () => {
|
|
220
|
-
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq',
|
|
220
|
+
await _testPos(readCsv('csvDfSepDna', csvDfSepDna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.DNA, 4, false, '*');
|
|
221
221
|
});
|
|
222
222
|
test('SepRna', async () => {
|
|
223
|
-
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq',
|
|
223
|
+
await _testPos(readCsv('csvDfSepRna', csvDfSepRna), 'seq', bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.RNA, 4, false, '*');
|
|
224
224
|
});
|
|
225
225
|
test('SepPt', async () => {
|
|
226
|
-
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
226
|
+
await _testPos(readCsv('csvDfSepPt', csvDfSepPt), 'seq',
|
|
227
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false, '-');
|
|
227
228
|
});
|
|
228
229
|
test('SepUn1', async () => {
|
|
229
|
-
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
230
|
+
await _testPos(readCsv('csvDfSepUn1', csvDfSepUn1), 'seq',
|
|
231
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 8, true, '-');
|
|
230
232
|
});
|
|
231
233
|
test('SepUn2', async () => {
|
|
232
|
-
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
234
|
+
await _testPos(readCsv('csvDfSepUn2', csvDfSepUn2), 'seq',
|
|
235
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ, bio.ALPHABET.UN, 9, true, '/');
|
|
233
236
|
});
|
|
234
237
|
|
|
235
238
|
test('SepMsaN1', async () => {
|
|
236
|
-
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
239
|
+
await _testPos(readCsv('csvDfSepMsaDna1', csvDfSepMsaDna1), 'seq',
|
|
240
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.DNA, 4, false, '-');
|
|
237
241
|
});
|
|
238
242
|
|
|
239
243
|
test('SamplesFastaCsvPt', async () => {
|
|
240
|
-
await _testPos(readSamples(Samples.fastaCsv), 'sequence',
|
|
244
|
+
await _testPos(readSamples(Samples.fastaCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
241
245
|
});
|
|
242
246
|
test('SamplesFastaCsvNegativeEntry', async () => {
|
|
243
247
|
await _testNeg(readSamples(Samples.fastaCsv), 'Entry');
|
|
@@ -250,7 +254,8 @@ MWRSWY-CKHP
|
|
|
250
254
|
});
|
|
251
255
|
|
|
252
256
|
test('SamplesFastaFastaPt', async () => {
|
|
253
|
-
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
257
|
+
await _testPos(readSamples(Samples.fastaFasta, readFileFasta),
|
|
258
|
+
'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
254
259
|
});
|
|
255
260
|
|
|
256
261
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
@@ -268,7 +273,8 @@ MWRSWY-CKHP
|
|
|
268
273
|
});
|
|
269
274
|
|
|
270
275
|
test('samplesMsaComplexUn', async () => {
|
|
271
|
-
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
276
|
+
await _testPos(readSamples(Samples.msaComplex), 'MSA',
|
|
277
|
+
bio.NOTATION.SEPARATOR, bio.ALIGNMENT.SEQ_MSA, bio.ALPHABET.UN, 161, true, '/');
|
|
272
278
|
});
|
|
273
279
|
test('samplesMsaComplexNegativeActivity', async () => {
|
|
274
280
|
await _testNeg(readSamples(Samples.msaComplex), 'Activity');
|
|
@@ -283,7 +289,7 @@ MWRSWY-CKHP
|
|
|
283
289
|
});
|
|
284
290
|
|
|
285
291
|
test('samplesHelmCsvHELM', async () => {
|
|
286
|
-
await _testPos(readSamples(Samples.helmCsv), 'HELM',
|
|
292
|
+
await _testPos(readSamples(Samples.helmCsv), 'HELM', bio.NOTATION.HELM, null, null, 160, true, null);
|
|
287
293
|
});
|
|
288
294
|
|
|
289
295
|
test('samplesHelmCsvNegativeActivity', async () => {
|
|
@@ -299,7 +305,7 @@ MWRSWY-CKHP
|
|
|
299
305
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Test type');
|
|
300
306
|
});
|
|
301
307
|
test('samplesTestHelmPositiveHelmString', async () => {
|
|
302
|
-
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string',
|
|
308
|
+
await _testPos(readSamples(Samples.testHelmCsv), 'HELM string', bio.NOTATION.HELM, null, null, 9, true, null);
|
|
303
309
|
});
|
|
304
310
|
test('samplesTestHelmNegativeValid', async () => {
|
|
305
311
|
await _testNeg(readSamples(Samples.testHelmCsv), 'Valid?');
|
|
@@ -331,7 +337,7 @@ MWRSWY-CKHP
|
|
|
331
337
|
});
|
|
332
338
|
|
|
333
339
|
test('samplesFastaPtPosSequence', async () => {
|
|
334
|
-
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence',
|
|
340
|
+
await _testPos(readSamples(Samples.fastaPtCsv), 'sequence', bio.NOTATION.FASTA, bio.ALIGNMENT.SEQ, bio.ALPHABET.PT, 20, false);
|
|
335
341
|
});
|
|
336
342
|
|
|
337
343
|
test('samplesTestCerealNegativeCerealName', async () => {
|
|
@@ -383,7 +389,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
383
389
|
throw new Error(msg);
|
|
384
390
|
// col.semType = '';
|
|
385
391
|
// col.setTag(DG.TAGS.UNITS, '');
|
|
386
|
-
// col.setTag(
|
|
392
|
+
// col.setTag(NOTATION.SEPARATOR, '');
|
|
387
393
|
}
|
|
388
394
|
}
|
|
389
395
|
|
|
@@ -406,7 +412,7 @@ export async function _testPos(
|
|
|
406
412
|
if (separator)
|
|
407
413
|
expect(col.getTag(bio.TAGS.separator), separator);
|
|
408
414
|
|
|
409
|
-
const uh = new UnitsHandler(col);
|
|
415
|
+
const uh = new bio.UnitsHandler(col);
|
|
410
416
|
expect(uh.getAlphabetSize(), alphabetSize);
|
|
411
417
|
expect(uh.getAlphabetIsMultichar(), alphabetIsMultichar);
|
|
412
418
|
if (!uh.isHelm()) {
|
|
@@ -103,7 +103,7 @@ category('renderers', () => {
|
|
|
103
103
|
async function _rendererMacromoleculeDifference() {
|
|
104
104
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
105
105
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
106
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
106
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
107
107
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
108
108
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
109
109
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -139,7 +139,7 @@ category('renderers', () => {
|
|
|
139
139
|
`cell.renderer="${srcSeqCol!.getTag(DG.TAGS.CELL_RENDERER)}"`);
|
|
140
140
|
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
141
141
|
expect(srcSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
142
|
-
expect(srcSeqCol.getTag(bio.TAGS.aligned),
|
|
142
|
+
expect(srcSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ);
|
|
143
143
|
expect(srcSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
144
144
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
145
145
|
|
|
@@ -148,7 +148,7 @@ category('renderers', () => {
|
|
|
148
148
|
|
|
149
149
|
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
150
150
|
expect(msaSeqCol.getTag(DG.TAGS.UNITS), bio.NOTATION.FASTA);
|
|
151
|
-
expect(msaSeqCol.getTag(bio.TAGS.aligned),
|
|
151
|
+
expect(msaSeqCol.getTag(bio.TAGS.aligned), bio.ALIGNMENT.SEQ_MSA);
|
|
152
152
|
expect(msaSeqCol.getTag(bio.TAGS.alphabet), bio.ALPHABET.PT);
|
|
153
153
|
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
154
154
|
|
|
@@ -190,7 +190,7 @@ category('renderers', () => {
|
|
|
190
190
|
/**/
|
|
191
191
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
192
192
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
193
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
193
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
194
194
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
195
195
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
|
|
196
196
|
const df = DG.DataFrame.fromColumns([seqDiffCol]);
|
|
@@ -206,10 +206,12 @@ category('renderers', () => {
|
|
|
206
206
|
`view renderer has set to '${renderer}' instead of correct 'MacromoleculeDifference'.`);
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
/** GROK-11212 Do not overwrite / recalculate 'cell.renderer' tag that has been set programmatically
|
|
210
|
+
* https://reddata.atlassian.net/browse/GROK-11212 */
|
|
209
211
|
async function _setRendererManually() {
|
|
210
212
|
const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
|
|
211
213
|
['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
|
|
212
|
-
seqDiffCol.tags[DG.TAGS.UNITS] =
|
|
214
|
+
seqDiffCol.tags[DG.TAGS.UNITS] = bio.NOTATION.SEPARATOR;
|
|
213
215
|
seqDiffCol.tags[TAGS.SEPARATOR] = '/';
|
|
214
216
|
seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
|
|
215
217
|
const tgtCellRenderer = 'MacromoleculeDifference';
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
import {readDataframe} from './utils';
|
|
5
|
+
import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
category('substructureFilters', async () => {
|
|
9
|
+
test('fasta', async () => {
|
|
10
|
+
const fasta = await readDataframe('tests/filter_FASTA.csv');
|
|
11
|
+
const filter = new BioSubstructureFilter();
|
|
12
|
+
await grok.data.detectSemanticTypes(fasta);
|
|
13
|
+
filter.attach(fasta);
|
|
14
|
+
filter.bioFilter!.substructure = 'MD';
|
|
15
|
+
await delay(100);
|
|
16
|
+
expect(filter.dataFrame!.filter.trueCount, 3);
|
|
17
|
+
expect(filter.dataFrame!.filter.get(0), true);
|
|
18
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
19
|
+
expect(filter.dataFrame!.filter.get(8), true);
|
|
20
|
+
expect(filter.dataFrame!.filter.get(1), false);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test('separator', async () => {
|
|
24
|
+
const msa = await readDataframe('tests/filter_MSA.csv');
|
|
25
|
+
const filter = new BioSubstructureFilter();
|
|
26
|
+
await grok.data.detectSemanticTypes(msa);
|
|
27
|
+
filter.attach(msa);
|
|
28
|
+
filter.bioFilter!.substructure = 'meI';
|
|
29
|
+
await delay(100);
|
|
30
|
+
expect(filter.dataFrame!.filter.trueCount, 7);
|
|
31
|
+
expect(filter.dataFrame!.filter.get(2), false);
|
|
32
|
+
filter.bioFilter!.substructure = '/meI';
|
|
33
|
+
await delay(100);
|
|
34
|
+
expect(filter.dataFrame!.filter.trueCount, 0);
|
|
35
|
+
filter.bioFilter!.substructure = 'meI-hHis';
|
|
36
|
+
(filter.bioFilter! as SeparatorFilter).separatorInput.value = '-';
|
|
37
|
+
await delay(100);
|
|
38
|
+
expect(filter.dataFrame!.filter.trueCount, 7);
|
|
39
|
+
expect(filter.dataFrame!.filter.get(2), false);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('helm', async () => {
|
|
43
|
+
const helm = await readDataframe('tests/filter_HELM.csv');
|
|
44
|
+
const helmTableView = grok.shell.addTableView(helm);
|
|
45
|
+
const filter = new BioSubstructureFilter();
|
|
46
|
+
await grok.data.detectSemanticTypes(helm);
|
|
47
|
+
filter.attach(helm);
|
|
48
|
+
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{C}$$$$V2.0';
|
|
49
|
+
filter.bioFilter!.onChanged.next();
|
|
50
|
+
await delay(1000);
|
|
51
|
+
expect(filter.dataFrame!.filter.trueCount, 2);
|
|
52
|
+
expect(filter.dataFrame!.filter.get(0), true);
|
|
53
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
54
|
+
(filter.bioFilter! as HelmFilter).helmSubstructure = 'PEPTIDE1{A.C}$$$$V2.0';
|
|
55
|
+
filter.bioFilter!.onChanged.next();
|
|
56
|
+
await delay(100);
|
|
57
|
+
expect(filter.dataFrame!.filter.trueCount, 1);
|
|
58
|
+
expect(filter.dataFrame!.filter.get(3), true);
|
|
59
|
+
helmTableView.close();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
2
1
|
import * as grok from 'datagrok-api/grok';
|
|
3
|
-
import
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
import * as bio from '@datagrok-libraries/bio';
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
export function generateManySequences(): DG.Column[] {
|
|
6
8
|
let columns: DG.Column[] = [];
|
|
@@ -19,10 +21,10 @@ export function generateLongSequence(): DG.Column[] {
|
|
|
19
21
|
|
|
20
22
|
export function setTagsMacromolecule(col: DG.Column) {
|
|
21
23
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
22
|
-
col.setTag(
|
|
23
|
-
col.setTag(
|
|
24
|
-
col.setTag(
|
|
25
|
-
col.setTag(
|
|
24
|
+
col.setTag(DG.TAGS.UNITS, bio.NOTATION.SEPARATOR);
|
|
25
|
+
col.setTag(bio.TAGS.aligned, bio.ALIGNMENT.SEQ_MSA);
|
|
26
|
+
col.setTag(bio.TAGS.alphabet, bio.ALPHABET.UN);
|
|
27
|
+
col.setTag(bio.TAGS.separator, '/');
|
|
26
28
|
return col;
|
|
27
29
|
}
|
|
28
30
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
|
-
import * as ui from 'datagrok-api/ui';
|
|
3
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
4
6
|
import * as bio from '@datagrok-libraries/bio';
|
|
5
7
|
import * as C from './constants';
|
|
6
8
|
|
|
@@ -44,7 +46,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
44
46
|
}
|
|
45
47
|
|
|
46
48
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
47
|
-
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !==
|
|
49
|
+
if (gridCell.cell.column.getTag(bio.TAGS.aligned) !== bio.ALIGNMENT.SEQ_MSA)
|
|
48
50
|
return;
|
|
49
51
|
|
|
50
52
|
const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
|